thard_alignment.nim 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. discard """
  2. disabled: "arm64"
  3. cmd: "nim c --gc:arc $file"
  4. output: "y"
  5. """
  6. {.passC: "-march=native".}
  7. proc isAlignedCheck(p: pointer, alignment: int) =
  8. doAssert (cast[uint](p) and uint(alignment - 1)) == 0
  9. proc isAlignedCheck[T](p: ref T, alignment: int) =
  10. isAlignedCheck(cast[pointer](p), alignment)
  11. type
  12. m256d {.importc: "__m256d", header: "immintrin.h".} = object
  13. proc set1(x: float): m256d {.importc: "_mm256_set1_pd", header: "immintrin.h".}
  14. func `+`(a,b: m256d): m256d {.importc: "_mm256_add_pd", header: "immintrin.h".}
  15. proc `$`(a: m256d): string =
  16. result = $(cast[ptr float](a.unsafeAddr)[])
  17. var res: seq[seq[m256d]]
  18. for _ in 1..1000:
  19. var x = newSeq[m256d](1)
  20. x[0] = set1(1.0) # test if operation causes segfault
  21. isAlignedCheck(x[0].addr, alignof(m256d))
  22. res.add x
  23. var res2: seq[m256d]
  24. for i in 1..10000:
  25. res2.setLen(res2.len + 1) # check if realloc works
  26. isAlignedCheck(res2[0].addr, alignof(m256d))
  27. proc lambdaGen(a, b: float, z: ref m256d) : auto =
  28. var x1 = new(m256d)
  29. var x2 = new(m256d)
  30. isAlignedCheck(x1, alignof(m256d))
  31. isAlignedCheck(x2, alignof(m256d))
  32. x1[] = set1(2.0 + a)
  33. x2[] = set1(-23.0 - b)
  34. let capturingLambda = proc(x: ref m256d): ref m256d =
  35. var cc = new(m256d)
  36. var bb = new(m256d)
  37. isAlignedCheck(x1, alignof(m256d))
  38. isAlignedCheck(x2, alignof(m256d))
  39. isAlignedCheck(cc, alignof(m256d))
  40. isAlignedCheck(bb, alignof(m256d))
  41. isAlignedCheck(z, alignof(m256d))
  42. cc[] = x1[] + x1[] + z[]
  43. bb[] = x2[] + set1(12.5) + z[]
  44. result = new(m256d)
  45. isAlignedCheck(result, alignof(m256d))
  46. result[] = cc[] + bb[] + x[]
  47. return capturingLambda
  48. var xx = new(m256d)
  49. xx[] = set1(10)
  50. isAlignedCheck(xx, alignOf(m256d))
  51. let f1 = lambdaGen(2.0 , 2.221, xx)
  52. let f2 = lambdaGen(-1.226 , 3.5, xx)
  53. isAlignedCheck(f1(xx), alignOf(m256d))
  54. isAlignedCheck(f2(xx), alignOf(m256d))
  55. #-----------------------------------------------------------------------------
  56. type
  57. MyAligned = object of RootObj
  58. a{.align: 128.}: float
  59. var f: MyAligned
  60. isAlignedCheck(f.addr, MyAligned.alignOf)
  61. var fref = new(MyAligned)
  62. isAlignedCheck(fref, MyAligned.alignOf)
  63. var fs: seq[MyAligned]
  64. var fr: seq[RootRef]
  65. for i in 0..1000:
  66. fs.add MyAligned()
  67. isAlignedCheck(fs[^1].addr, MyAligned.alignOf)
  68. fs[^1].a = i.float
  69. fr.add new(MyAligned)
  70. isAlignedCheck(fr[^1], MyAligned.alignOf)
  71. ((ref MyAligned)fr[^1])[].a = i.float
  72. for i in 0..1000:
  73. doAssert(fs[i].a == i.float)
  74. doAssert(((ref MyAligned)fr[i]).a == i.float)
  75. proc lambdaTest2(a: MyAligned, z: ref MyAligned): auto =
  76. var x1: MyAligned
  77. x1.a = a.a + z.a
  78. var x2: MyAligned
  79. x2.a = a.a - z.a
  80. let capturingLambda = proc(x: MyAligned): MyAligned =
  81. var cc: MyAligned
  82. var bb: MyAligned
  83. isAlignedCheck(x1.addr, MyAligned.alignOf)
  84. isAlignedCheck(x2.addr, MyAligned.alignOf)
  85. isAlignedCheck(cc.addr, MyAligned.alignOf)
  86. isAlignedCheck(bb.addr, MyAligned.alignOf)
  87. isAlignedCheck(z, MyAligned.alignOf)
  88. cc.a = x1.a + x1.a + z.a
  89. bb.a = x2.a - z.a
  90. isAlignedCheck(result.addr, MyAligned.alignOf)
  91. result.a = cc.a + bb.a + x2.a
  92. return capturingLambda
  93. let q1 = lambdaTest2(MyAligned(a: 1.0), (ref MyAligned)(a: 2.0))
  94. let q2 = lambdaTest2(MyAligned( a: -1.0), (ref MyAligned)(a: -2.0))
  95. isAlignedCheck(rawEnv(q1), MyAligned.alignOf)
  96. isAlignedCheck(rawEnv(q2), MyAligned.alignOf)
  97. discard q1(MyAligned(a: 1.0))
  98. discard q2(MyAligned(a: -1.0))
  99. #-----------------------------------------------------------------------------
  100. block:
  101. var s: seq[seq[MyAligned]]
  102. for len in 0..128:
  103. s.add newSeq[MyAligned](len)
  104. for i in 0..<len:
  105. s[^1][i] = MyAligned(a: 1.0)
  106. if len > 0:
  107. isAlignedCheck(s[^1][0].addr, MyAligned.alignOf)
  108. echo "y"