bitops.nim 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2017 Nim Authors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a series of low level methods for bit manipulation.
  10. ##
  11. ## By default, compiler intrinsics are used where possible to improve performance
  12. ## on supported compilers: `GCC`, `LLVM_GCC`, `CLANG`, `VCC`, `ICC`.
  13. ##
  14. ## The module will fallback to pure nim procs in case the backend is not supported.
  15. ## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics.
  16. ##
  17. ## This module is also compatible with other backends: `JavaScript`, `NimScript`
  18. ## as well as the `compiletime VM`.
  19. ##
  20. ## As a result of using optimized functions/intrinsics, some functions can return
  21. ## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts`
  22. ## to force predictable behaviour for all input, causing a small performance hit.
  23. ##
  24. ## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits`
  25. ## may return undefined and/or platform dependent values if given invalid input.
  26. import macros
  27. import std/private/since
  28. from std/private/bitops_utils import forwardImpl, castToUnsigned
  29. func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".}
  30. ## Computes the `bitwise complement` of the integer `x`.
  31. func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".}
  32. func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".}
  33. func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".}
  34. macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  35. ## Computes the `bitwise and` of all arguments collectively.
  36. let fn = bindSym("internalBitand")
  37. result = newCall(fn, x, y)
  38. for extra in z:
  39. result = newCall(fn, result, extra)
  40. macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  41. ## Computes the `bitwise or` of all arguments collectively.
  42. let fn = bindSym("internalBitor")
  43. result = newCall(fn, x, y)
  44. for extra in z:
  45. result = newCall(fn, result, extra)
  46. macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  47. ## Computes the `bitwise xor` of all arguments collectively.
  48. let fn = bindSym("internalBitxor")
  49. result = newCall(fn, x, y)
  50. for extra in z:
  51. result = newCall(fn, result, extra)
  52. type BitsRange*[T] = range[0..sizeof(T)*8-1]
  53. ## A range with all bit positions for type `T`.
  54. template typeMasked[T: SomeInteger](x: T): T =
  55. when defined(js):
  56. x and ((0xffffffff_ffffffff'u shr (64 - sizeof(T) * 8)))
  57. else:
  58. x
  59. func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  60. ## Returns an extracted (and shifted) slice of bits from `v`.
  61. runnableExamples:
  62. doAssert 0b10111.bitsliced(2 .. 4) == 0b101
  63. doAssert 0b11100.bitsliced(0 .. 2) == 0b100
  64. doAssert 0b11100.bitsliced(0 ..< 3) == 0b100
  65. let
  66. upmost = sizeof(T) * 8 - 1
  67. uv = v.castToUnsigned
  68. ((uv shl (upmost - slice.b)).typeMasked shr (upmost - slice.b + slice.a)).T
  69. proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  70. ## Mutates `v` into an extracted (and shifted) slice of bits from `v`.
  71. runnableExamples:
  72. var x = 0b101110
  73. x.bitslice(2 .. 4)
  74. doAssert x == 0b011
  75. let
  76. upmost = sizeof(T) * 8 - 1
  77. uv = v.castToUnsigned
  78. v = ((uv shl (upmost - slice.b)).typeMasked shr (upmost - slice.b + slice.a)).T
  79. func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
  80. ## Creates a bitmask based on a slice of bits.
  81. runnableExamples:
  82. doAssert toMask[int32](1 .. 3) == 0b1110'i32
  83. doAssert toMask[int32](0 .. 3) == 0b1111'i32
  84. let
  85. upmost = sizeof(T) * 8 - 1
  86. bitmask = bitnot(0.T).castToUnsigned
  87. ((bitmask shl (upmost - slice.b + slice.a)).typeMasked shr (upmost - slice.b)).T
  88. proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  89. ## Returns `v`, with only the `1` bits from `mask` matching those of
  90. ## `v` set to 1.
  91. ##
  92. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  93. runnableExamples:
  94. let v = 0b0000_0011'u8
  95. doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8
  96. bitand(v, mask)
  97. func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  98. ## Returns `v`, with only the `1` bits in the range of `slice`
  99. ## matching those of `v` set to 1.
  100. ##
  101. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  102. runnableExamples:
  103. let v = 0b0000_1011'u8
  104. doAssert v.masked(1 .. 3) == 0b0000_1010'u8
  105. bitand(v, toMask[T](slice))
  106. proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} =
  107. ## Mutates `v`, with only the `1` bits from `mask` matching those of
  108. ## `v` set to 1.
  109. ##
  110. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  111. runnableExamples:
  112. var v = 0b0000_0011'u8
  113. v.mask(0b0000_1010'u8)
  114. doAssert v == 0b0000_0010'u8
  115. v = bitand(v, mask)
  116. proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  117. ## Mutates `v`, with only the `1` bits in the range of `slice`
  118. ## matching those of `v` set to 1.
  119. ##
  120. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  121. runnableExamples:
  122. var v = 0b0000_1011'u8
  123. v.mask(1 .. 3)
  124. doAssert v == 0b0000_1010'u8
  125. v = bitand(v, toMask[T](slice))
  126. func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  127. ## Returns `v`, with all the `1` bits from `mask` set to 1.
  128. ##
  129. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  130. runnableExamples:
  131. let v = 0b0000_0011'u8
  132. doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8
  133. bitor(v, mask)
  134. func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  135. ## Returns `v`, with all the `1` bits in the range of `slice` set to 1.
  136. ##
  137. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  138. runnableExamples:
  139. let v = 0b0000_0011'u8
  140. doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8
  141. bitor(v, toMask[T](slice))
  142. proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  143. ## Mutates `v`, with all the `1` bits from `mask` set to 1.
  144. ##
  145. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  146. runnableExamples:
  147. var v = 0b0000_0011'u8
  148. v.setMask(0b0000_1010'u8)
  149. doAssert v == 0b0000_1011'u8
  150. v = bitor(v, mask)
  151. proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  152. ## Mutates `v`, with all the `1` bits in the range of `slice` set to 1.
  153. ##
  154. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  155. runnableExamples:
  156. var v = 0b0000_0011'u8
  157. v.setMask(2 .. 3)
  158. doAssert v == 0b0000_1111'u8
  159. v = bitor(v, toMask[T](slice))
  160. func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  161. ## Returns `v`, with all the `1` bits from `mask` set to 0.
  162. ##
  163. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  164. ## with an *inverted mask*.
  165. runnableExamples:
  166. let v = 0b0000_0011'u8
  167. doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8
  168. bitand(v, bitnot(mask))
  169. func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  170. ## Returns `v`, with all the `1` bits in the range of `slice` set to 0.
  171. ##
  172. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  173. ## with an *inverted mask*.
  174. runnableExamples:
  175. let v = 0b0000_0011'u8
  176. doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8
  177. bitand(v, bitnot(toMask[T](slice)))
  178. proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  179. ## Mutates `v`, with all the `1` bits from `mask` set to 0.
  180. ##
  181. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  182. ## with an *inverted mask*.
  183. runnableExamples:
  184. var v = 0b0000_0011'u8
  185. v.clearMask(0b0000_1010'u8)
  186. doAssert v == 0b0000_0001'u8
  187. v = bitand(v, bitnot(mask))
  188. proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  189. ## Mutates `v`, with all the `1` bits in the range of `slice` set to 0.
  190. ##
  191. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  192. ## with an *inverted mask*.
  193. runnableExamples:
  194. var v = 0b0000_0011'u8
  195. v.clearMask(1 .. 3)
  196. doAssert v == 0b0000_0001'u8
  197. v = bitand(v, bitnot(toMask[T](slice)))
  198. func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  199. ## Returns `v`, with all the `1` bits from `mask` flipped.
  200. ##
  201. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  202. runnableExamples:
  203. let v = 0b0000_0011'u8
  204. doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8
  205. bitxor(v, mask)
  206. func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  207. ## Returns `v`, with all the `1` bits in the range of `slice` flipped.
  208. ##
  209. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  210. runnableExamples:
  211. let v = 0b0000_0011'u8
  212. doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8
  213. bitxor(v, toMask[T](slice))
  214. proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  215. ## Mutates `v`, with all the `1` bits from `mask` flipped.
  216. ##
  217. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  218. runnableExamples:
  219. var v = 0b0000_0011'u8
  220. v.flipMask(0b0000_1010'u8)
  221. doAssert v == 0b0000_1001'u8
  222. v = bitxor(v, mask)
  223. proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  224. ## Mutates `v`, with all the `1` bits in the range of `slice` flipped.
  225. ##
  226. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  227. runnableExamples:
  228. var v = 0b0000_0011'u8
  229. v.flipMask(1 .. 3)
  230. doAssert v == 0b0000_1101'u8
  231. v = bitxor(v, toMask[T](slice))
  232. proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  233. ## Mutates `v`, with the bit at position `bit` set to 1.
  234. runnableExamples:
  235. var v = 0b0000_0011'u8
  236. v.setBit(5'u8)
  237. doAssert v == 0b0010_0011'u8
  238. v.setMask(1.T shl bit)
  239. proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  240. ## Mutates `v`, with the bit at position `bit` set to 0.
  241. runnableExamples:
  242. var v = 0b0000_0011'u8
  243. v.clearBit(1'u8)
  244. doAssert v == 0b0000_0001'u8
  245. v.clearMask(1.T shl bit)
  246. proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  247. ## Mutates `v`, with the bit at position `bit` flipped.
  248. runnableExamples:
  249. var v = 0b0000_0011'u8
  250. v.flipBit(1'u8)
  251. doAssert v == 0b0000_0001'u8
  252. v = 0b0000_0011'u8
  253. v.flipBit(2'u8)
  254. doAssert v == 0b0000_0111'u8
  255. v.flipMask(1.T shl bit)
  256. macro setBits*(v: typed; bits: varargs[typed]): untyped =
  257. ## Mutates `v`, with the bits at positions `bits` set to 1.
  258. runnableExamples:
  259. var v = 0b0000_0011'u8
  260. v.setBits(3, 5, 7)
  261. doAssert v == 0b1010_1011'u8
  262. bits.expectKind(nnkBracket)
  263. result = newStmtList()
  264. for bit in bits:
  265. result.add newCall("setBit", v, bit)
  266. macro clearBits*(v: typed; bits: varargs[typed]): untyped =
  267. ## Mutates `v`, with the bits at positions `bits` set to 0.
  268. runnableExamples:
  269. var v = 0b1111_1111'u8
  270. v.clearBits(1, 3, 5, 7)
  271. doAssert v == 0b0101_0101'u8
  272. bits.expectKind(nnkBracket)
  273. result = newStmtList()
  274. for bit in bits:
  275. result.add newCall("clearBit", v, bit)
  276. macro flipBits*(v: typed; bits: varargs[typed]): untyped =
  277. ## Mutates `v`, with the bits at positions `bits` set to 0.
  278. runnableExamples:
  279. var v = 0b0000_1111'u8
  280. v.flipBits(1, 3, 5, 7)
  281. doAssert v == 0b1010_0101'u8
  282. bits.expectKind(nnkBracket)
  283. result = newStmtList()
  284. for bit in bits:
  285. result.add newCall("flipBit", v, bit)
  286. proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} =
  287. ## Returns true if the bit in `v` at positions `bit` is set to 1.
  288. runnableExamples:
  289. let v = 0b0000_1111'u8
  290. doAssert v.testBit(0)
  291. doAssert not v.testBit(7)
  292. let mask = 1.T shl bit
  293. return (v and mask) == mask
  294. # #### Pure Nim version ####
  295. func firstSetBitNim(x: uint32): int {.inline.} =
  296. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  297. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  298. const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15,
  299. 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
  300. let v = x.uint32
  301. let k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v))
  302. result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int
  303. func firstSetBitNim(x: uint64): int {.inline.} =
  304. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  305. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  306. let v = uint64(x)
  307. var k = uint32(v and 0xFFFFFFFF'u32)
  308. if k == 0:
  309. k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32
  310. result = 32
  311. else:
  312. result = 0
  313. result += firstSetBitNim(k)
  314. func fastlog2Nim(x: uint32): int {.inline.} =
  315. ## Quickly find the log base 2 of a 32-bit or less integer.
  316. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  317. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  318. const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
  319. 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
  320. var v = x.uint32
  321. v = v or v shr 1 # first round down to one less than a power of 2
  322. v = v or v shr 2
  323. v = v or v shr 4
  324. v = v or v shr 8
  325. v = v or v shr 16
  326. result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
  327. func fastlog2Nim(x: uint64): int {.inline.} =
  328. ## Quickly find the log base 2 of a 64-bit integer.
  329. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  330. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  331. const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
  332. 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
  333. 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
  334. 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
  335. var v = x.uint64
  336. v = v or v shr 1 # first round down to one less than a power of 2
  337. v = v or v shr 2
  338. v = v or v shr 4
  339. v = v or v shr 8
  340. v = v or v shr 16
  341. v = v or v shr 32
  342. result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
  343. import system/countbits_impl
  344. const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
  345. (defined(gcc) or defined(clang) or defined(vcc) or
  346. (defined(icl) and not defined(cpp))) and useBuiltins
  347. template parityImpl[T](value: T): int =
  348. # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
  349. var v = value
  350. when sizeof(T) == 8:
  351. v = v xor (v shr 32)
  352. when sizeof(T) >= 4:
  353. v = v xor (v shr 16)
  354. when sizeof(T) >= 2:
  355. v = v xor (v shr 8)
  356. v = v xor (v shr 4)
  357. v = v and 0xf
  358. ((0x6996'u shr v) and 1).int
  359. when useGCC_builtins:
  360. # Returns the bit parity in value
  361. proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.}
  362. proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.}
  363. # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
  364. proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.}
  365. proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.}
  366. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  367. proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.}
  368. proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.}
  369. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  370. proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.}
  371. proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.}
  372. elif useVCC_builtins:
  373. # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
  374. func bitScanReverse(index: ptr culong, mask: culong): uint8 {.
  375. importc: "_BitScanReverse", header: "<intrin.h>".}
  376. func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {.
  377. importc: "_BitScanReverse64", header: "<intrin.h>".}
  378. # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
  379. func bitScanForward(index: ptr culong, mask: culong): uint8 {.
  380. importc: "_BitScanForward", header: "<intrin.h>".}
  381. func bitScanForward64(index: ptr culong, mask: uint64): uint8 {.
  382. importc: "_BitScanForward64", header: "<intrin.h>".}
  383. template vcc_scan_impl(fnc: untyped; v: untyped): int =
  384. var index: culong
  385. discard fnc(index.addr, v)
  386. index.int
  387. elif useICC_builtins:
  388. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  389. func bitScanForward(p: ptr uint32, b: uint32): uint8 {.
  390. importc: "_BitScanForward", header: "<immintrin.h>".}
  391. func bitScanForward64(p: ptr uint32, b: uint64): uint8 {.
  392. importc: "_BitScanForward64", header: "<immintrin.h>".}
  393. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  394. func bitScanReverse(p: ptr uint32, b: uint32): uint8 {.
  395. importc: "_BitScanReverse", header: "<immintrin.h>".}
  396. func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {.
  397. importc: "_BitScanReverse64", header: "<immintrin.h>".}
  398. template icc_scan_impl(fnc: untyped; v: untyped): int =
  399. var index: uint32
  400. discard fnc(index.addr, v)
  401. index.int
  402. func countSetBits*(x: SomeInteger): int {.inline.} =
  403. ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
  404. runnableExamples:
  405. doAssert countSetBits(0b0000_0011'u8) == 2
  406. doAssert countSetBits(0b1010_1010'u8) == 4
  407. result = countSetBitsImpl(x)
  408. func popcount*(x: SomeInteger): int {.inline.} =
  409. ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight).
  410. result = countSetBits(x)
  411. func parityBits*(x: SomeInteger): int {.inline.} =
  412. ## Calculate the bit parity in an integer. If the number of 1-bits
  413. ## is odd, the parity is 1, otherwise 0.
  414. runnableExamples:
  415. doAssert parityBits(0b0000_0000'u8) == 0
  416. doAssert parityBits(0b0101_0001'u8) == 1
  417. doAssert parityBits(0b0110_1001'u8) == 0
  418. doAssert parityBits(0b0111_1111'u8) == 1
  419. # Can be used a base if creating ASM version.
  420. # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
  421. let x = x.castToUnsigned
  422. when nimvm:
  423. result = forwardImpl(parityImpl, x)
  424. else:
  425. when useGCC_builtins:
  426. when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
  427. else: result = builtin_parityll(x.uint64).int
  428. else:
  429. when sizeof(x) <= 4: result = parityImpl(x.uint32)
  430. else: result = parityImpl(x.uint64)
  431. func firstSetBit*(x: SomeInteger): int {.inline.} =
  432. ## Returns the 1-based index of the least significant set bit of `x`.
  433. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  434. ## otherwise the result is undefined.
  435. runnableExamples:
  436. doAssert firstSetBit(0b0000_0001'u8) == 1
  437. doAssert firstSetBit(0b0000_0010'u8) == 2
  438. doAssert firstSetBit(0b0000_0100'u8) == 3
  439. doAssert firstSetBit(0b0000_1000'u8) == 4
  440. doAssert firstSetBit(0b0000_1111'u8) == 1
  441. # GCC builtin 'builtin_ffs' already handle zero input.
  442. let x = x.castToUnsigned
  443. when nimvm:
  444. when noUndefined:
  445. if x == 0:
  446. return 0
  447. result = forwardImpl(firstSetBitNim, x)
  448. else:
  449. when noUndefined and not useGCC_builtins:
  450. if x == 0:
  451. return 0
  452. when useGCC_builtins:
  453. when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int
  454. else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int
  455. elif useVCC_builtins:
  456. when sizeof(x) <= 4:
  457. result = 1 + vcc_scan_impl(bitScanForward, x.culong)
  458. elif arch64:
  459. result = 1 + vcc_scan_impl(bitScanForward64, x.uint64)
  460. else:
  461. result = firstSetBitNim(x.uint64)
  462. elif useICC_builtins:
  463. when sizeof(x) <= 4:
  464. result = 1 + icc_scan_impl(bitScanForward, x.uint32)
  465. elif arch64:
  466. result = 1 + icc_scan_impl(bitScanForward64, x.uint64)
  467. else:
  468. result = firstSetBitNim(x.uint64)
  469. else:
  470. when sizeof(x) <= 4: result = firstSetBitNim(x.uint32)
  471. else: result = firstSetBitNim(x.uint64)
  472. func fastLog2*(x: SomeInteger): int {.inline.} =
  473. ## Quickly find the log base 2 of an integer.
  474. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is -1,
  475. ## otherwise the result is undefined.
  476. runnableExamples:
  477. doAssert fastLog2(0b0000_0001'u8) == 0
  478. doAssert fastLog2(0b0000_0010'u8) == 1
  479. doAssert fastLog2(0b0000_0100'u8) == 2
  480. doAssert fastLog2(0b0000_1000'u8) == 3
  481. doAssert fastLog2(0b0000_1111'u8) == 3
  482. let x = x.castToUnsigned
  483. when noUndefined:
  484. if x == 0:
  485. return -1
  486. when nimvm:
  487. result = forwardImpl(fastlog2Nim, x)
  488. else:
  489. when useGCC_builtins:
  490. when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
  491. else: result = 63 - builtin_clzll(x.uint64).int
  492. elif useVCC_builtins:
  493. when sizeof(x) <= 4:
  494. result = vcc_scan_impl(bitScanReverse, x.culong)
  495. elif arch64:
  496. result = vcc_scan_impl(bitScanReverse64, x.uint64)
  497. else:
  498. result = fastlog2Nim(x.uint64)
  499. elif useICC_builtins:
  500. when sizeof(x) <= 4:
  501. result = icc_scan_impl(bitScanReverse, x.uint32)
  502. elif arch64:
  503. result = icc_scan_impl(bitScanReverse64, x.uint64)
  504. else:
  505. result = fastlog2Nim(x.uint64)
  506. else:
  507. when sizeof(x) <= 4: result = fastlog2Nim(x.uint32)
  508. else: result = fastlog2Nim(x.uint64)
  509. func countLeadingZeroBits*(x: SomeInteger): int {.inline.} =
  510. ## Returns the number of leading zero bits in an integer.
  511. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  512. ## otherwise the result is undefined.
  513. ##
  514. ## **See also:**
  515. ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_
  516. runnableExamples:
  517. doAssert countLeadingZeroBits(0b0000_0001'u8) == 7
  518. doAssert countLeadingZeroBits(0b0000_0010'u8) == 6
  519. doAssert countLeadingZeroBits(0b0000_0100'u8) == 5
  520. doAssert countLeadingZeroBits(0b0000_1000'u8) == 4
  521. doAssert countLeadingZeroBits(0b0000_1111'u8) == 4
  522. let x = x.castToUnsigned
  523. when noUndefined:
  524. if x == 0:
  525. return 0
  526. when nimvm:
  527. result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x)
  528. else:
  529. when useGCC_builtins:
  530. when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
  531. else: result = builtin_clzll(x.uint64).int
  532. else:
  533. when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32)
  534. else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64)
  535. func countTrailingZeroBits*(x: SomeInteger): int {.inline.} =
  536. ## Returns the number of trailing zeros in an integer.
  537. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  538. ## otherwise the result is undefined.
  539. ##
  540. ## **See also:**
  541. ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_
  542. runnableExamples:
  543. doAssert countTrailingZeroBits(0b0000_0001'u8) == 0
  544. doAssert countTrailingZeroBits(0b0000_0010'u8) == 1
  545. doAssert countTrailingZeroBits(0b0000_0100'u8) == 2
  546. doAssert countTrailingZeroBits(0b0000_1000'u8) == 3
  547. doAssert countTrailingZeroBits(0b0000_1111'u8) == 0
  548. let x = x.castToUnsigned
  549. when noUndefined:
  550. if x == 0:
  551. return 0
  552. when nimvm:
  553. result = firstSetBit(x) - 1
  554. else:
  555. when useGCC_builtins:
  556. when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int
  557. else: result = builtin_ctzll(x.uint64).int
  558. else:
  559. result = firstSetBit(x) - 1
  560. when useBuiltinsRotate:
  561. when defined(gcc):
  562. # GCC was tested until version 4.8.1 and intrinsics were present. Not tested
  563. # in previous versions.
  564. func builtin_rotl8(value: uint8, shift: cint): uint8
  565. {.importc: "__rolb", header: "<x86intrin.h>".}
  566. func builtin_rotl16(value: cushort, shift: cint): cushort
  567. {.importc: "__rolw", header: "<x86intrin.h>".}
  568. func builtin_rotl32(value: cuint, shift: cint): cuint
  569. {.importc: "__rold", header: "<x86intrin.h>".}
  570. when defined(amd64):
  571. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  572. {.importc: "__rolq", header: "<x86intrin.h>".}
  573. func builtin_rotr8(value: uint8, shift: cint): uint8
  574. {.importc: "__rorb", header: "<x86intrin.h>".}
  575. func builtin_rotr16(value: cushort, shift: cint): cushort
  576. {.importc: "__rorw", header: "<x86intrin.h>".}
  577. func builtin_rotr32(value: cuint, shift: cint): cuint
  578. {.importc: "__rord", header: "<x86intrin.h>".}
  579. when defined(amd64):
  580. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  581. {.importc: "__rorq", header: "<x86intrin.h>".}
  582. elif defined(clang):
  583. # In CLANG, builtins have been present since version 8.0.0 and intrinsics
  584. # since version 9.0.0. This implementation chose the builtins, as they have
  585. # been around for longer.
  586. # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release
  587. # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft
  588. # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def
  589. func builtin_rotl8(value: uint8, shift: uint8): uint8
  590. {.importc: "__builtin_rotateleft8", nodecl.}
  591. func builtin_rotl16(value: cushort, shift: cushort): cushort
  592. {.importc: "__builtin_rotateleft16", nodecl.}
  593. func builtin_rotl32(value: cuint, shift: cuint): cuint
  594. {.importc: "__builtin_rotateleft32", nodecl.}
  595. when defined(amd64):
  596. func builtin_rotl64(value: culonglong, shift: culonglong): culonglong
  597. {.importc: "__builtin_rotateleft64", nodecl.}
  598. func builtin_rotr8(value: uint8, shift: uint8): uint8
  599. {.importc: "__builtin_rotateright8", nodecl.}
  600. func builtin_rotr16(value: cushort, shift: cushort): cushort
  601. {.importc: "__builtin_rotateright16", nodecl.}
  602. func builtin_rotr32(value: cuint, shift: cuint): cuint
  603. {.importc: "__builtin_rotateright32", nodecl.}
  604. when defined(amd64):
  605. # shift is unsigned, refs https://github.com/llvm-mirror/clang/commit/892de415b7fde609dafc4e6c1643b7eaa0150a4d
  606. func builtin_rotr64(value: culonglong, shift: culonglong): culonglong
  607. {.importc: "__builtin_rotateright64", nodecl.}
  608. elif defined(vcc):
  609. # Tested on Microsoft (R) C/C++ Optimizing Compiler 19.28.29335 x64 and x86.
  610. # Not tested in previous versions.
  611. # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160
  612. # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160
  613. # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160
  614. func builtin_rotl8(value: uint8, shift: uint8): uint8
  615. {.importc: "_rotl8", header: "<intrin.h>".}
  616. func builtin_rotl16(value: cushort, shift: uint8): cushort
  617. {.importc: "_rotl16", header: "<intrin.h>".}
  618. func builtin_rotl32(value: cuint, shift: cint): cuint
  619. {.importc: "_rotl", header: "<stdlib.h>".}
  620. when defined(amd64):
  621. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  622. {.importc: "_rotl64", header: "<stdlib.h>".}
  623. func builtin_rotr8(value: uint8, shift: uint8): uint8
  624. {.importc: "_rotr8", header: "<intrin.h>".}
  625. func builtin_rotr16(value: cushort, shift: uint8): cushort
  626. {.importc: "_rotr16", header: "<intrin.h>".}
  627. func builtin_rotr32(value: cuint, shift: cint): cuint
  628. {.importc: "_rotr", header: "<stdlib.h>".}
  629. when defined(amd64):
  630. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  631. {.importc: "_rotr64", header: "<stdlib.h>".}
  632. elif defined(icl):
  633. # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build
  634. # 20201208_000000 x64 and x86. Not tested in previous versions.
  635. func builtin_rotl8(value: uint8, shift: cint): uint8
  636. {.importc: "__rolb", header: "<immintrin.h>".}
  637. func builtin_rotl16(value: cushort, shift: cint): cushort
  638. {.importc: "__rolw", header: "<immintrin.h>".}
  639. func builtin_rotl32(value: cuint, shift: cint): cuint
  640. {.importc: "__rold", header: "<immintrin.h>".}
  641. when defined(amd64):
  642. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  643. {.importc: "__rolq", header: "<immintrin.h>".}
  644. func builtin_rotr8(value: uint8, shift: cint): uint8
  645. {.importc: "__rorb", header: "<immintrin.h>".}
  646. func builtin_rotr16(value: cushort, shift: cint): cushort
  647. {.importc: "__rorw", header: "<immintrin.h>".}
  648. func builtin_rotr32(value: cuint, shift: cint): cuint
  649. {.importc: "__rord", header: "<immintrin.h>".}
  650. when defined(amd64):
  651. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  652. {.importc: "__rorq", header: "<immintrin.h>".}
  653. func rotl[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
  654. ## Left-rotate bits in a `value`.
  655. # https://stackoverflow.com/a/776523
  656. const mask = 8 * sizeof(value) - 1
  657. let rot = rot and mask
  658. (value shl rot) or (value shr ((-rot) and mask))
  659. func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
  660. ## Right-rotate bits in a `value`.
  661. const mask = 8 * sizeof(value) - 1
  662. let rot = rot and mask
  663. (value shr rot) or (value shl ((-rot) and mask))
  664. func shiftTypeTo(size: static int, shift: int): auto {.inline.} =
  665. ## Returns the `shift` for the rotation according to the compiler and the
  666. ## `size`.
  667. when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl):
  668. cint(shift)
  669. elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1):
  670. uint8(shift)
  671. elif defined(clang):
  672. when size == 2:
  673. cushort(shift)
  674. elif size == 4:
  675. cuint(shift)
  676. elif size == 8:
  677. culonglong(shift)
  678. func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
  679. ## Left-rotate bits in a `value`.
  680. runnableExamples:
  681. doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
  682. doAssert rotateLeftBits(0b00111100_11000011'u16, 8) ==
  683. 0b11000011_00111100'u16
  684. doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) ==
  685. 0b1111000000001111_0000111111110000'u32
  686. doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
  687. 0b11111111000000000000000011111111_00000000111111111111111100000000'u64
  688. when nimvm:
  689. rotl(value, shift.int32)
  690. else:
  691. when useBuiltinsRotate:
  692. const size = sizeof(T)
  693. when size == 1:
  694. builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T
  695. elif size == 2:
  696. builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T
  697. elif size == 4:
  698. builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T
  699. elif size == 8 and arch64:
  700. builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T
  701. else:
  702. rotl(value, shift.int32)
  703. else:
  704. rotl(value, shift.int32)
  705. func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
  706. ## Right-rotate bits in a `value`.
  707. runnableExamples:
  708. doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
  709. doAssert rotateRightBits(0b00111100_11000011'u16, 8) ==
  710. 0b11000011_00111100'u16
  711. doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) ==
  712. 0b1111000000001111_0000111111110000'u32
  713. doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
  714. 0b11111111000000000000000011111111_00000000111111111111111100000000'u64
  715. when nimvm:
  716. rotr(value, shift.int32)
  717. else:
  718. when useBuiltinsRotate:
  719. const size = sizeof(T)
  720. when size == 1:
  721. builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T
  722. elif size == 2:
  723. builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T
  724. elif size == 4:
  725. builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T
  726. elif size == 8 and arch64:
  727. builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T
  728. else:
  729. rotr(value, shift.int32)
  730. else:
  731. rotr(value, shift.int32)
  732. func repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T =
  733. result = x
  734. var i = 1
  735. while i != (sizeof(T) div sizeof(x)):
  736. result = (result shl (sizeof(x)*8*i)) or result
  737. i *= 2
  738. func reverseBits*[T: SomeUnsignedInt](x: T): T =
  739. ## Return the bit reversal of x.
  740. runnableExamples:
  741. doAssert reverseBits(0b10100100'u8) == 0b00100101'u8
  742. doAssert reverseBits(0xdd'u8) == 0xbb'u8
  743. doAssert reverseBits(0xddbb'u16) == 0xddbb'u16
  744. doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32
  745. template repeat(x: SomeUnsignedInt): T = repeatBits(x, T)
  746. result = x
  747. result =
  748. ((repeat(0x55u8) and result) shl 1) or
  749. ((repeat(0xaau8) and result) shr 1)
  750. result =
  751. ((repeat(0x33u8) and result) shl 2) or
  752. ((repeat(0xccu8) and result) shr 2)
  753. when sizeof(T) == 1:
  754. result = (result shl 4) or (result shr 4)
  755. when sizeof(T) >= 2:
  756. result =
  757. ((repeat(0x0fu8) and result) shl 4) or
  758. ((repeat(0xf0u8) and result) shr 4)
  759. when sizeof(T) == 2:
  760. result = (result shl 8) or (result shr 8)
  761. when sizeof(T) >= 4:
  762. result =
  763. ((repeat(0x00ffu16) and result) shl 8) or
  764. ((repeat(0xff00u16) and result) shr 8)
  765. when sizeof(T) == 4:
  766. result = (result shl 16) or (result shr 16)
  767. when sizeof(T) == 8:
  768. result =
  769. ((repeat(0x0000ffffu32) and result) shl 16) or
  770. ((repeat(0xffff0000u32) and result) shr 16)
  771. result = (result shl 32) or (result shr 32)