thard_alignment.nim 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. discard """
  2. disabled: "arm64"
  3. cmd: "nim c --mm:arc -u:nimPreviewNonVarDestructor $file"
  4. output: "y"
  5. """
  6. # TODO: fixme: investigate why it failed with non-var destructors
  7. {.passC: "-march=native".}
  8. proc isAlignedCheck(p: pointer, alignment: int) =
  9. doAssert (cast[uint](p) and uint(alignment - 1)) == 0
  10. proc isAlignedCheck[T](p: ref T, alignment: int) =
  11. isAlignedCheck(cast[pointer](p), alignment)
  12. type
  13. m256d {.importc: "__m256d", header: "immintrin.h".} = object
  14. proc set1(x: float): m256d {.importc: "_mm256_set1_pd", header: "immintrin.h".}
  15. func `+`(a,b: m256d): m256d {.importc: "_mm256_add_pd", header: "immintrin.h".}
  16. proc `$`(a: m256d): string =
  17. result = $(cast[ptr float](a.addr)[])
  18. var res: seq[seq[m256d]]
  19. for _ in 1..1000:
  20. var x = newSeq[m256d](1)
  21. x[0] = set1(1.0) # test if operation causes segfault
  22. isAlignedCheck(x[0].addr, alignof(m256d))
  23. res.add x
  24. var res2: seq[m256d]
  25. for i in 1..10000:
  26. res2.setLen(res2.len + 1) # check if realloc works
  27. isAlignedCheck(res2[0].addr, alignof(m256d))
  28. proc lambdaGen(a, b: float, z: ref m256d) : auto =
  29. var x1 = new(m256d)
  30. var x2 = new(m256d)
  31. isAlignedCheck(x1, alignof(m256d))
  32. isAlignedCheck(x2, alignof(m256d))
  33. x1[] = set1(2.0 + a)
  34. x2[] = set1(-23.0 - b)
  35. let capturingLambda = proc(x: ref m256d): ref m256d =
  36. var cc = new(m256d)
  37. var bb = new(m256d)
  38. isAlignedCheck(x1, alignof(m256d))
  39. isAlignedCheck(x2, alignof(m256d))
  40. isAlignedCheck(cc, alignof(m256d))
  41. isAlignedCheck(bb, alignof(m256d))
  42. isAlignedCheck(z, alignof(m256d))
  43. cc[] = x1[] + x1[] + z[]
  44. bb[] = x2[] + set1(12.5) + z[]
  45. result = new(m256d)
  46. isAlignedCheck(result, alignof(m256d))
  47. result[] = cc[] + bb[] + x[]
  48. return capturingLambda
  49. var xx = new(m256d)
  50. xx[] = set1(10)
  51. isAlignedCheck(xx, alignOf(m256d))
  52. let f1 = lambdaGen(2.0 , 2.221, xx)
  53. let f2 = lambdaGen(-1.226 , 3.5, xx)
  54. isAlignedCheck(f1(xx), alignOf(m256d))
  55. isAlignedCheck(f2(xx), alignOf(m256d))
  56. #-----------------------------------------------------------------------------
  57. type
  58. MyAligned = object of RootObj
  59. a{.align: 128.}: float
  60. var f: MyAligned
  61. isAlignedCheck(f.addr, MyAligned.alignOf)
  62. var fref = new(MyAligned)
  63. isAlignedCheck(fref, MyAligned.alignOf)
  64. var fs: seq[MyAligned]
  65. var fr: seq[RootRef]
  66. for i in 0..1000:
  67. fs.add MyAligned()
  68. isAlignedCheck(fs[^1].addr, MyAligned.alignOf)
  69. fs[^1].a = i.float
  70. fr.add new(MyAligned)
  71. isAlignedCheck(fr[^1], MyAligned.alignOf)
  72. ((ref MyAligned)fr[^1])[].a = i.float
  73. for i in 0..1000:
  74. doAssert(fs[i].a == i.float)
  75. doAssert(((ref MyAligned)fr[i]).a == i.float)
  76. proc lambdaTest2(a: MyAligned, z: ref MyAligned): auto =
  77. var x1: MyAligned
  78. x1.a = a.a + z.a
  79. var x2: MyAligned
  80. x2.a = a.a - z.a
  81. let capturingLambda = proc(x: MyAligned): MyAligned =
  82. var cc: MyAligned
  83. var bb: MyAligned
  84. isAlignedCheck(x1.addr, MyAligned.alignOf)
  85. isAlignedCheck(x2.addr, MyAligned.alignOf)
  86. isAlignedCheck(cc.addr, MyAligned.alignOf)
  87. isAlignedCheck(bb.addr, MyAligned.alignOf)
  88. isAlignedCheck(z, MyAligned.alignOf)
  89. cc.a = x1.a + x1.a + z.a
  90. bb.a = x2.a - z.a
  91. isAlignedCheck(result.addr, MyAligned.alignOf)
  92. result.a = cc.a + bb.a + x2.a
  93. return capturingLambda
  94. let q1 = lambdaTest2(MyAligned(a: 1.0), (ref MyAligned)(a: 2.0))
  95. let q2 = lambdaTest2(MyAligned( a: -1.0), (ref MyAligned)(a: -2.0))
  96. isAlignedCheck(rawEnv(q1), MyAligned.alignOf)
  97. isAlignedCheck(rawEnv(q2), MyAligned.alignOf)
  98. discard q1(MyAligned(a: 1.0))
  99. discard q2(MyAligned(a: -1.0))
  100. #-----------------------------------------------------------------------------
  101. block:
  102. var s: seq[seq[MyAligned]]
  103. for len in 0..128:
  104. s.add newSeq[MyAligned](len)
  105. for i in 0..<len:
  106. s[^1][i] = MyAligned(a: 1.0)
  107. if len > 0:
  108. isAlignedCheck(s[^1][0].addr, MyAligned.alignOf)
  109. echo "y"