sysatomics.nim 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. when defined(nimPreviewSlimSystem):
  10. {.deprecated: "use `std/atomics` instead".}
  11. # Atomic operations for Nim.
  12. {.push stackTrace:off, profiler:off.}
  13. const
  14. hasThreadSupport = compileOption("threads") and not defined(nimscript)
  15. const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang) or defined(nintendoswitch)
  16. const someVcc = defined(vcc) or defined(clang_cl)
  17. type
  18. AtomType* = SomeNumber|pointer|ptr|char|bool
  19. ## Type Class representing valid types for use with atomic procs
  20. when someGcc:
  21. type AtomMemModel* = distinct cint
  22. var ATOMIC_RELAXED* {.importc: "__ATOMIC_RELAXED", nodecl.}: AtomMemModel
  23. ## No barriers or synchronization.
  24. var ATOMIC_CONSUME* {.importc: "__ATOMIC_CONSUME", nodecl.}: AtomMemModel
  25. ## Data dependency only for both barrier and
  26. ## synchronization with another thread.
  27. var ATOMIC_ACQUIRE* {.importc: "__ATOMIC_ACQUIRE", nodecl.}: AtomMemModel
  28. ## Barrier to hoisting of code and synchronizes with
  29. ## release (or stronger)
  30. ## semantic stores from another thread.
  31. var ATOMIC_RELEASE* {.importc: "__ATOMIC_RELEASE", nodecl.}: AtomMemModel
  32. ## Barrier to sinking of code and synchronizes with
  33. ## acquire (or stronger)
  34. ## semantic loads from another thread.
  35. var ATOMIC_ACQ_REL* {.importc: "__ATOMIC_ACQ_REL", nodecl.}: AtomMemModel
  36. ## Full barrier in both directions and synchronizes
  37. ## with acquire loads
  38. ## and release stores in another thread.
  39. var ATOMIC_SEQ_CST* {.importc: "__ATOMIC_SEQ_CST", nodecl.}: AtomMemModel
  40. ## Full barrier in both directions and synchronizes
  41. ## with acquire loads
  42. ## and release stores in all threads.
  43. proc atomicLoadN*[T: AtomType](p: ptr T, mem: AtomMemModel): T {.
  44. importc: "__atomic_load_n", nodecl.}
  45. ## This proc implements an atomic load operation. It returns the contents at p.
  46. ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.
  47. proc atomicLoad*[T: AtomType](p, ret: ptr T, mem: AtomMemModel) {.
  48. importc: "__atomic_load", nodecl.}
  49. ## This is the generic version of an atomic load. It returns the contents at p in ret.
  50. proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel) {.
  51. importc: "__atomic_store_n", nodecl.}
  52. ## This proc implements an atomic store operation. It writes val at p.
  53. ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.
  54. proc atomicStore*[T: AtomType](p, val: ptr T, mem: AtomMemModel) {.
  55. importc: "__atomic_store", nodecl.}
  56. ## This is the generic version of an atomic store. It stores the value of val at p
  57. proc atomicExchangeN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  58. importc: "__atomic_exchange_n", nodecl.}
  59. ## This proc implements an atomic exchange operation. It writes val at p,
  60. ## and returns the previous contents at p.
  61. ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL
  62. proc atomicExchange*[T: AtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
  63. importc: "__atomic_exchange", nodecl.}
  64. ## This is the generic version of an atomic exchange. It stores the contents at val at p.
  65. ## The original value at p is copied into ret.
  66. proc atomicCompareExchangeN*[T: AtomType](p, expected: ptr T, desired: T,
  67. weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
  68. importc: "__atomic_compare_exchange_n", nodecl.}
  69. ## This proc implements an atomic compare and exchange operation. This compares the
  70. ## contents at p with the contents at expected and if equal, writes desired at p.
  71. ## If they are not equal, the current contents at p is written into expected.
  72. ## Weak is true for weak compare_exchange, and false for the strong variation.
  73. ## Many targets only offer the strong variation and ignore the parameter.
  74. ## When in doubt, use the strong variation.
  75. ## True is returned if desired is written at p and the execution is considered
  76. ## to conform to the memory model specified by success_memmodel. There are no
  77. ## restrictions on what memory model can be used here. False is returned otherwise,
  78. ## and the execution is considered to conform to failure_memmodel. This memory model
  79. ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
  80. ## than that specified by success_memmodel.
  81. proc atomicCompareExchange*[T: AtomType](p, expected, desired: ptr T,
  82. weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
  83. importc: "__atomic_compare_exchange", nodecl.}
  84. ## This proc implements the generic version of atomic_compare_exchange.
  85. ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
  86. ## value is also a pointer.
  87. ## Perform the operation return the new value, all memory models are valid
  88. proc atomicAddFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  89. importc: "__atomic_add_fetch", nodecl.}
  90. proc atomicSubFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  91. importc: "__atomic_sub_fetch", nodecl.}
  92. proc atomicOrFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  93. importc: "__atomic_or_fetch", nodecl.}
  94. proc atomicAndFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  95. importc: "__atomic_and_fetch", nodecl.}
  96. proc atomicXorFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  97. importc: "__atomic_xor_fetch", nodecl.}
  98. proc atomicNandFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  99. importc: "__atomic_nand_fetch", nodecl.}
  100. ## Perform the operation return the old value, all memory models are valid
  101. proc atomicFetchAdd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  102. importc: "__atomic_fetch_add", nodecl.}
  103. proc atomicFetchSub*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  104. importc: "__atomic_fetch_sub", nodecl.}
  105. proc atomicFetchOr*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  106. importc: "__atomic_fetch_or", nodecl.}
  107. proc atomicFetchAnd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  108. importc: "__atomic_fetch_and", nodecl.}
  109. proc atomicFetchXor*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  110. importc: "__atomic_fetch_xor", nodecl.}
  111. proc atomicFetchNand*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
  112. importc: "__atomic_fetch_nand", nodecl.}
  113. proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
  114. importc: "__atomic_test_and_set", nodecl.}
  115. ## This built-in function performs an atomic test-and-set operation on the byte at p.
  116. ## The byte is set to some implementation defined nonzero "set" value and the return
  117. ## value is true if and only if the previous contents were "set".
  118. ## All memory models are valid.
  119. proc atomicClear*(p: pointer, mem: AtomMemModel) {.
  120. importc: "__atomic_clear", nodecl.}
  121. ## This built-in function performs an atomic clear operation at p.
  122. ## After the operation, at p contains 0.
  123. ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_RELEASE
  124. proc atomicThreadFence*(mem: AtomMemModel) {.
  125. importc: "__atomic_thread_fence", nodecl.}
  126. ## This built-in function acts as a synchronization fence between threads based
  127. ## on the specified memory model. All memory orders are valid.
  128. proc atomicSignalFence*(mem: AtomMemModel) {.
  129. importc: "__atomic_signal_fence", nodecl.}
  130. ## This built-in function acts as a synchronization fence between a thread and
  131. ## signal handlers based in the same thread. All memory orders are valid.
  132. proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.
  133. importc: "__atomic_always_lock_free", nodecl.}
  134. ## This built-in function returns true if objects of size bytes always generate
  135. ## lock free atomic instructions for the target architecture. size must resolve
  136. ## to a compile-time constant and the result also resolves to a compile-time constant.
  137. ## ptr is an optional pointer to the object that may be used to determine alignment.
  138. ## A value of 0 indicates typical alignment should be used. The compiler may also
  139. ## ignore this parameter.
  140. proc atomicIsLockFree*(size: int, p: pointer): bool {.
  141. importc: "__atomic_is_lock_free", nodecl.}
  142. ## This built-in function returns true if objects of size bytes always generate
  143. ## lock free atomic instructions for the target architecture. If it is not known
  144. ## to be lock free a call is made to a runtime routine named __atomic_is_lock_free.
  145. ## ptr is an optional pointer to the object that may be used to determine alignment.
  146. ## A value of 0 indicates typical alignment should be used. The compiler may also
  147. ## ignore this parameter.
  148. template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
  149. elif someVcc:
  150. type AtomMemModel* = distinct cint
  151. const
  152. ATOMIC_RELAXED* = 0.AtomMemModel
  153. ATOMIC_CONSUME* = 1.AtomMemModel
  154. ATOMIC_ACQUIRE* = 2.AtomMemModel
  155. ATOMIC_RELEASE* = 3.AtomMemModel
  156. ATOMIC_ACQ_REL* = 4.AtomMemModel
  157. ATOMIC_SEQ_CST* = 5.AtomMemModel
  158. proc `==`(x1, x2: AtomMemModel): bool {.borrow.}
  159. proc readBarrier() {.importc: "_ReadBarrier", header: "<intrin.h>".}
  160. proc writeBarrier() {.importc: "_WriteBarrier", header: "<intrin.h>".}
  161. proc fence*() {.importc: "_ReadWriteBarrier", header: "<intrin.h>".}
  162. when defined(cpp):
  163. proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
  164. {.importcpp: "_InterlockedCompareExchange64(static_cast<NI64 volatile *>(#), #, #)", header: "<intrin.h>".}
  165. proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
  166. {.importcpp: "_InterlockedCompareExchange(static_cast<long volatile *>(#), #, #)", header: "<intrin.h>".}
  167. proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
  168. {.importcpp: "_InterlockedCompareExchange8(static_cast<char volatile *>(#), #, #)", header: "<intrin.h>".}
  169. proc interlockedExchange8(location: pointer; desired: int8): int8 {.importcpp: "_InterlockedExchange8(static_cast<NI8 volatile *>(#), #)", header: "<intrin.h>".}
  170. proc interlockedExchange16(location: pointer; desired: int16): int16 {.importcpp: "_InterlockedExchange16(static_cast<NI16 volatile *>(#), #)", header: "<intrin.h>".}
  171. proc interlockedExchange32(location: pointer; desired: int32): int32 {.importcpp: "_InterlockedExchange(static_cast<long volatile *>(#), #)", header: "<intrin.h>".}
  172. proc interlockedExchange64(location: pointer; desired: int64): int64 {.importcpp: "_InterlockedExchange64(static_cast<NI64 volatile *>(#), #)", header: "<intrin.h>".}
  173. else:
  174. proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
  175. {.importc: "_InterlockedCompareExchange64", header: "<intrin.h>".}
  176. proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
  177. {.importc: "_InterlockedCompareExchange", header: "<intrin.h>".}
  178. proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
  179. {.importc: "_InterlockedCompareExchange8", header: "<intrin.h>".}
  180. proc interlockedExchange8(location: pointer; desired: int8): int8 {.importc: "_InterlockedExchange8", header: "<intrin.h>".}
  181. proc interlockedExchange16(location: pointer; desired: int16): int16 {.importc: "_InterlockedExchange16", header: "<intrin.h>".}
  182. proc interlockedExchange32(location: pointer; desired: int32): int32 {.importc: "_InterlockedExchange", header: "<intrin.h>".}
  183. proc interlockedExchange64(location: pointer; desired: int64): int64 {.importc: "_InterlockedExchange64", header: "<intrin.h>".}
  184. template barrier(mem: AtomMemModel) =
  185. when mem == ATOMIC_RELAXED: discard
  186. elif mem == ATOMIC_CONSUME: readBarrier()
  187. elif mem == ATOMIC_ACQUIRE: writeBarrier()
  188. elif mem == ATOMIC_RELEASE: fence()
  189. elif mem == ATOMIC_ACQ_REL: fence()
  190. elif mem == ATOMIC_SEQ_CST: fence()
  191. proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: static[AtomMemModel]) =
  192. barrier(mem)
  193. p[] = val
  194. proc atomicLoadN*[T: AtomType](p: ptr T, mem: static[AtomMemModel]): T =
  195. result = p[]
  196. barrier(mem)
  197. proc atomicCompareExchangeN*[T: ptr](p, expected: ptr T, desired: T,
  198. weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool =
  199. when sizeof(T) == 8:
  200. interlockedCompareExchange64(p, cast[int64](desired), cast[int64](expected)) ==
  201. cast[int64](expected)
  202. elif sizeof(T) == 4:
  203. interlockedCompareExchange32(p, cast[int32](desired), cast[int32](expected)) ==
  204. cast[int32](expected)
  205. proc atomicExchangeN*[T: ptr](p: ptr T, val: T, mem: AtomMemModel): T =
  206. when sizeof(T) == 8:
  207. cast[T](interlockedExchange64(p, cast[int64](val)))
  208. elif sizeof(T) == 4:
  209. cast[T](interlockedExchange32(p, cast[int32](val)))
  210. when defined(cpp):
  211. when sizeof(int) == 8:
  212. proc addAndFetch*(p: ptr int, val: int): int {.
  213. importcpp: "_InterlockedExchangeAdd64(static_cast<NI volatile *>(#), #)",
  214. header: "<intrin.h>".}
  215. else:
  216. proc addAndFetch*(p: ptr int, val: int): int {.
  217. importcpp: "_InterlockedExchangeAdd(reinterpret_cast<long volatile *>(#), static_cast<long>(#))",
  218. header: "<intrin.h>".}
  219. else:
  220. when sizeof(int) == 8:
  221. proc addAndFetch*(p: ptr int, val: int): int {.
  222. importc: "_InterlockedExchangeAdd64", header: "<intrin.h>".}
  223. else:
  224. proc addAndFetch*(p: ptr int, val: int): int {.
  225. importc: "_InterlockedExchangeAdd", header: "<intrin.h>".}
  226. else:
  227. proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
  228. inc(p[], val)
  229. result = p[]
  230. proc atomicInc*(memLoc: var int, x: int = 1): int {.inline, discardable, raises: [], tags: [].} =
  231. ## Atomically increments the integer by some `x`. It returns the new value.
  232. when someGcc and hasThreadSupport:
  233. result = atomicAddFetch(memLoc.addr, x, ATOMIC_SEQ_CST)
  234. elif someVcc and hasThreadSupport:
  235. result = addAndFetch(memLoc.addr, x)
  236. inc(result, x)
  237. else:
  238. inc(memLoc, x)
  239. result = memLoc
  240. proc atomicDec*(memLoc: var int, x: int = 1): int {.inline, discardable, raises: [], tags: [].} =
  241. ## Atomically decrements the integer by some `x`. It returns the new value.
  242. when someGcc and hasThreadSupport:
  243. when declared(atomicSubFetch):
  244. result = atomicSubFetch(memLoc.addr, x, ATOMIC_SEQ_CST)
  245. else:
  246. result = atomicAddFetch(memLoc.addr, -x, ATOMIC_SEQ_CST)
  247. elif someVcc and hasThreadSupport:
  248. result = addAndFetch(memLoc.addr, -x)
  249. dec(result, x)
  250. else:
  251. dec(memLoc, x)
  252. result = memLoc
  253. when someVcc:
  254. proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
  255. when sizeof(T) == 8:
  256. interlockedCompareExchange64(p, cast[int64](newValue), cast[int64](oldValue)) ==
  257. cast[int64](oldValue)
  258. elif sizeof(T) == 4:
  259. interlockedCompareExchange32(p, cast[int32](newValue), cast[int32](oldValue)) ==
  260. cast[int32](oldValue)
  261. elif sizeof(T) == 1:
  262. interlockedCompareExchange8(p, cast[byte](newValue), cast[byte](oldValue)) ==
  263. cast[byte](oldValue)
  264. else:
  265. {.error: "invalid CAS instruction".}
  266. elif defined(tcc):
  267. when defined(amd64):
  268. {.emit:"""
  269. static int __tcc_cas(int *ptr, int oldVal, int newVal)
  270. {
  271. unsigned char ret;
  272. __asm__ __volatile__ (
  273. " lock\n"
  274. " cmpxchgq %2,%1\n"
  275. " sete %0\n"
  276. : "=q" (ret), "=m" (*ptr)
  277. : "r" (newVal), "m" (*ptr), "a" (oldVal)
  278. : "memory");
  279. return ret;
  280. }
  281. """.}
  282. else:
  283. #assert sizeof(int) == 4
  284. {.emit:"""
  285. static int __tcc_cas(int *ptr, int oldVal, int newVal)
  286. {
  287. unsigned char ret;
  288. __asm__ __volatile__ (
  289. " lock\n"
  290. " cmpxchgl %2,%1\n"
  291. " sete %0\n"
  292. : "=q" (ret), "=m" (*ptr)
  293. : "r" (newVal), "m" (*ptr), "a" (oldVal)
  294. : "memory");
  295. return ret;
  296. }
  297. """.}
  298. proc tcc_cas(p: ptr int; oldValue, newValue: int): bool
  299. {.importc: "__tcc_cas", nodecl.}
  300. proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
  301. tcc_cas(cast[ptr int](p), cast[int](oldValue), cast[int](newValue))
  302. elif declared(atomicCompareExchangeN):
  303. proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
  304. atomicCompareExchangeN(p, oldValue.unsafeAddr, newValue, false, ATOMIC_SEQ_CST, ATOMIC_SEQ_CST)
  305. else:
  306. # this is valid for GCC and Intel C++
  307. proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
  308. {.importc: "__sync_bool_compare_and_swap", nodecl.}
  309. # XXX is this valid for 'int'?
  310. when (defined(x86) or defined(amd64)) and someVcc:
  311. proc cpuRelax* {.importc: "YieldProcessor", header: "<windows.h>".}
  312. elif (defined(x86) or defined(amd64)) and (someGcc or defined(bcc)):
  313. proc cpuRelax* {.inline.} =
  314. {.emit: """asm volatile("pause" ::: "memory");""".}
  315. elif someGcc or defined(tcc):
  316. proc cpuRelax* {.inline.} =
  317. {.emit: """asm volatile("" ::: "memory");""".}
  318. elif defined(icl):
  319. proc cpuRelax* {.importc: "_mm_pause", header: "xmmintrin.h".}
  320. elif false:
  321. from std/os import sleep
  322. proc cpuRelax* {.inline.} = os.sleep(1)
  323. when not declared(fence) and hasThreadSupport:
  324. # XXX fixme
  325. proc fence*() {.inline.} =
  326. var dummy: bool
  327. discard cas(addr dummy, false, true)
  328. {.pop.}