bitops.nim 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2017 Nim Authors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a series of low level methods for bit manipulation.
  10. ##
  11. ## By default, compiler intrinsics are used where possible to improve performance
  12. ## on supported compilers: `GCC`, `LLVM_GCC`, `CLANG`, `VCC`, `ICC`.
  13. ##
  14. ## The module will fallback to pure nim procs in case the backend is not supported.
  15. ## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics.
  16. ##
  17. ## This module is also compatible with other backends: `JavaScript`, `NimScript`
  18. ## as well as the `compiletime VM`.
  19. ##
  20. ## As a result of using optimized functions/intrinsics, some functions can return
  21. ## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts`
  22. ## to force predictable behaviour for all input, causing a small performance hit.
  23. ##
  24. ## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits`
  25. ## may return undefined and/or platform dependent values if given invalid input.
  26. import macros
  27. import std/private/since
  28. from std/private/bitops_utils import forwardImpl, toUnsigned
  29. func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".}
  30. ## Computes the `bitwise complement` of the integer `x`.
  31. func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".}
  32. func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".}
  33. func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".}
  34. macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  35. ## Computes the `bitwise and` of all arguments collectively.
  36. let fn = bindSym("internalBitand")
  37. result = newCall(fn, x, y)
  38. for extra in z:
  39. result = newCall(fn, result, extra)
  40. macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  41. ## Computes the `bitwise or` of all arguments collectively.
  42. let fn = bindSym("internalBitor")
  43. result = newCall(fn, x, y)
  44. for extra in z:
  45. result = newCall(fn, result, extra)
  46. macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  47. ## Computes the `bitwise xor` of all arguments collectively.
  48. let fn = bindSym("internalBitxor")
  49. result = newCall(fn, x, y)
  50. for extra in z:
  51. result = newCall(fn, result, extra)
  52. type BitsRange*[T] = range[0..sizeof(T)*8-1]
  53. ## A range with all bit positions for type `T`.
  54. func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  55. ## Returns an extracted (and shifted) slice of bits from `v`.
  56. runnableExamples:
  57. doAssert 0b10111.bitsliced(2 .. 4) == 0b101
  58. doAssert 0b11100.bitsliced(0 .. 2) == 0b100
  59. doAssert 0b11100.bitsliced(0 ..< 3) == 0b100
  60. let
  61. upmost = sizeof(T) * 8 - 1
  62. uv = when v is SomeUnsignedInt: v else: v.toUnsigned
  63. (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
  64. proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  65. ## Mutates `v` into an extracted (and shifted) slice of bits from `v`.
  66. runnableExamples:
  67. var x = 0b101110
  68. x.bitslice(2 .. 4)
  69. doAssert x == 0b011
  70. let
  71. upmost = sizeof(T) * 8 - 1
  72. uv = when v is SomeUnsignedInt: v else: v.toUnsigned
  73. v = (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
  74. func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
  75. ## Creates a bitmask based on a slice of bits.
  76. runnableExamples:
  77. doAssert toMask[int32](1 .. 3) == 0b1110'i32
  78. doAssert toMask[int32](0 .. 3) == 0b1111'i32
  79. let
  80. upmost = sizeof(T) * 8 - 1
  81. bitmask = when T is SomeUnsignedInt:
  82. bitnot(0.T)
  83. else:
  84. bitnot(0.T).toUnsigned
  85. (bitmask shl (upmost - slice.b + slice.a) shr (upmost - slice.b)).T
  86. proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  87. ## Returns `v`, with only the `1` bits from `mask` matching those of
  88. ## `v` set to 1.
  89. ##
  90. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  91. runnableExamples:
  92. let v = 0b0000_0011'u8
  93. doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8
  94. bitand(v, mask)
  95. func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  96. ## Returns `v`, with only the `1` bits in the range of `slice`
  97. ## matching those of `v` set to 1.
  98. ##
  99. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  100. runnableExamples:
  101. let v = 0b0000_1011'u8
  102. doAssert v.masked(1 .. 3) == 0b0000_1010'u8
  103. bitand(v, toMask[T](slice))
  104. proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} =
  105. ## Mutates `v`, with only the `1` bits from `mask` matching those of
  106. ## `v` set to 1.
  107. ##
  108. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  109. runnableExamples:
  110. var v = 0b0000_0011'u8
  111. v.mask(0b0000_1010'u8)
  112. doAssert v == 0b0000_0010'u8
  113. v = bitand(v, mask)
  114. proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  115. ## Mutates `v`, with only the `1` bits in the range of `slice`
  116. ## matching those of `v` set to 1.
  117. ##
  118. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
  119. runnableExamples:
  120. var v = 0b0000_1011'u8
  121. v.mask(1 .. 3)
  122. doAssert v == 0b0000_1010'u8
  123. v = bitand(v, toMask[T](slice))
  124. func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  125. ## Returns `v`, with all the `1` bits from `mask` set to 1.
  126. ##
  127. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  128. runnableExamples:
  129. let v = 0b0000_0011'u8
  130. doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8
  131. bitor(v, mask)
  132. func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  133. ## Returns `v`, with all the `1` bits in the range of `slice` set to 1.
  134. ##
  135. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  136. runnableExamples:
  137. let v = 0b0000_0011'u8
  138. doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8
  139. bitor(v, toMask[T](slice))
  140. proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  141. ## Mutates `v`, with all the `1` bits from `mask` set to 1.
  142. ##
  143. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  144. runnableExamples:
  145. var v = 0b0000_0011'u8
  146. v.setMask(0b0000_1010'u8)
  147. doAssert v == 0b0000_1011'u8
  148. v = bitor(v, mask)
  149. proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  150. ## Mutates `v`, with all the `1` bits in the range of `slice` set to 1.
  151. ##
  152. ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
  153. runnableExamples:
  154. var v = 0b0000_0011'u8
  155. v.setMask(2 .. 3)
  156. doAssert v == 0b0000_1111'u8
  157. v = bitor(v, toMask[T](slice))
  158. func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  159. ## Returns `v`, with all the `1` bits from `mask` set to 0.
  160. ##
  161. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  162. ## with an *inverted mask*.
  163. runnableExamples:
  164. let v = 0b0000_0011'u8
  165. doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8
  166. bitand(v, bitnot(mask))
  167. func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  168. ## Returns `v`, with all the `1` bits in the range of `slice` set to 0.
  169. ##
  170. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  171. ## with an *inverted mask*.
  172. runnableExamples:
  173. let v = 0b0000_0011'u8
  174. doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8
  175. bitand(v, bitnot(toMask[T](slice)))
  176. proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  177. ## Mutates `v`, with all the `1` bits from `mask` set to 0.
  178. ##
  179. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  180. ## with an *inverted mask*.
  181. runnableExamples:
  182. var v = 0b0000_0011'u8
  183. v.clearMask(0b0000_1010'u8)
  184. doAssert v == 0b0000_0001'u8
  185. v = bitand(v, bitnot(mask))
  186. proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  187. ## Mutates `v`, with all the `1` bits in the range of `slice` set to 0.
  188. ##
  189. ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
  190. ## with an *inverted mask*.
  191. runnableExamples:
  192. var v = 0b0000_0011'u8
  193. v.clearMask(1 .. 3)
  194. doAssert v == 0b0000_0001'u8
  195. v = bitand(v, bitnot(toMask[T](slice)))
  196. func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  197. ## Returns `v`, with all the `1` bits from `mask` flipped.
  198. ##
  199. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  200. runnableExamples:
  201. let v = 0b0000_0011'u8
  202. doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8
  203. bitxor(v, mask)
  204. func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  205. ## Returns `v`, with all the `1` bits in the range of `slice` flipped.
  206. ##
  207. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  208. runnableExamples:
  209. let v = 0b0000_0011'u8
  210. doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8
  211. bitxor(v, toMask[T](slice))
  212. proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  213. ## Mutates `v`, with all the `1` bits from `mask` flipped.
  214. ##
  215. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  216. runnableExamples:
  217. var v = 0b0000_0011'u8
  218. v.flipMask(0b0000_1010'u8)
  219. doAssert v == 0b0000_1001'u8
  220. v = bitxor(v, mask)
  221. proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  222. ## Mutates `v`, with all the `1` bits in the range of `slice` flipped.
  223. ##
  224. ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
  225. runnableExamples:
  226. var v = 0b0000_0011'u8
  227. v.flipMask(1 .. 3)
  228. doAssert v == 0b0000_1101'u8
  229. v = bitxor(v, toMask[T](slice))
  230. proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  231. ## Mutates `v`, with the bit at position `bit` set to 1.
  232. runnableExamples:
  233. var v = 0b0000_0011'u8
  234. v.setBit(5'u8)
  235. doAssert v == 0b0010_0011'u8
  236. v.setMask(1.T shl bit)
  237. proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  238. ## Mutates `v`, with the bit at position `bit` set to 0.
  239. runnableExamples:
  240. var v = 0b0000_0011'u8
  241. v.clearBit(1'u8)
  242. doAssert v == 0b0000_0001'u8
  243. v.clearMask(1.T shl bit)
  244. proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  245. ## Mutates `v`, with the bit at position `bit` flipped.
  246. runnableExamples:
  247. var v = 0b0000_0011'u8
  248. v.flipBit(1'u8)
  249. doAssert v == 0b0000_0001'u8
  250. v = 0b0000_0011'u8
  251. v.flipBit(2'u8)
  252. doAssert v == 0b0000_0111'u8
  253. v.flipMask(1.T shl bit)
  254. macro setBits*(v: typed; bits: varargs[typed]): untyped =
  255. ## Mutates `v`, with the bits at positions `bits` set to 1.
  256. runnableExamples:
  257. var v = 0b0000_0011'u8
  258. v.setBits(3, 5, 7)
  259. doAssert v == 0b1010_1011'u8
  260. bits.expectKind(nnkBracket)
  261. result = newStmtList()
  262. for bit in bits:
  263. result.add newCall("setBit", v, bit)
  264. macro clearBits*(v: typed; bits: varargs[typed]): untyped =
  265. ## Mutates `v`, with the bits at positions `bits` set to 0.
  266. runnableExamples:
  267. var v = 0b1111_1111'u8
  268. v.clearBits(1, 3, 5, 7)
  269. doAssert v == 0b0101_0101'u8
  270. bits.expectKind(nnkBracket)
  271. result = newStmtList()
  272. for bit in bits:
  273. result.add newCall("clearBit", v, bit)
  274. macro flipBits*(v: typed; bits: varargs[typed]): untyped =
  275. ## Mutates `v`, with the bits at positions `bits` set to 0.
  276. runnableExamples:
  277. var v = 0b0000_1111'u8
  278. v.flipBits(1, 3, 5, 7)
  279. doAssert v == 0b1010_0101'u8
  280. bits.expectKind(nnkBracket)
  281. result = newStmtList()
  282. for bit in bits:
  283. result.add newCall("flipBit", v, bit)
  284. proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} =
  285. ## Returns true if the bit in `v` at positions `bit` is set to 1.
  286. runnableExamples:
  287. let v = 0b0000_1111'u8
  288. doAssert v.testBit(0)
  289. doAssert not v.testBit(7)
  290. let mask = 1.T shl bit
  291. return (v and mask) == mask
  292. # #### Pure Nim version ####
  293. func firstSetBitNim(x: uint32): int {.inline.} =
  294. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  295. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  296. const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15,
  297. 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
  298. let v = x.uint32
  299. let k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v))
  300. result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int
  301. func firstSetBitNim(x: uint64): int {.inline.} =
  302. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  303. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  304. let v = uint64(x)
  305. var k = uint32(v and 0xFFFFFFFF'u32)
  306. if k == 0:
  307. k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32
  308. result = 32
  309. else:
  310. result = 0
  311. result += firstSetBitNim(k)
  312. func fastlog2Nim(x: uint32): int {.inline.} =
  313. ## Quickly find the log base 2 of a 32-bit or less integer.
  314. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  315. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  316. const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
  317. 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
  318. var v = x.uint32
  319. v = v or v shr 1 # first round down to one less than a power of 2
  320. v = v or v shr 2
  321. v = v or v shr 4
  322. v = v or v shr 8
  323. v = v or v shr 16
  324. result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
  325. func fastlog2Nim(x: uint64): int {.inline.} =
  326. ## Quickly find the log base 2 of a 64-bit integer.
  327. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  328. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  329. const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
  330. 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
  331. 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
  332. 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
  333. var v = x.uint64
  334. v = v or v shr 1 # first round down to one less than a power of 2
  335. v = v or v shr 2
  336. v = v or v shr 4
  337. v = v or v shr 8
  338. v = v or v shr 16
  339. v = v or v shr 32
  340. result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
  341. import system/countbits_impl
  342. const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
  343. (defined(gcc) or defined(clang) or defined(vcc) or
  344. (defined(icl) and not defined(cpp))) and useBuiltins
  345. template parityImpl[T](value: T): int =
  346. # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
  347. var v = value
  348. when sizeof(T) == 8:
  349. v = v xor (v shr 32)
  350. when sizeof(T) >= 4:
  351. v = v xor (v shr 16)
  352. when sizeof(T) >= 2:
  353. v = v xor (v shr 8)
  354. v = v xor (v shr 4)
  355. v = v and 0xf
  356. ((0x6996'u shr v) and 1).int
  357. when useGCC_builtins:
  358. # Returns the bit parity in value
  359. proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.}
  360. proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.}
  361. # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
  362. proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.}
  363. proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.}
  364. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  365. proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.}
  366. proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.}
  367. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  368. proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.}
  369. proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.}
  370. elif useVCC_builtins:
  371. # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
  372. func bitScanReverse(index: ptr culong, mask: culong): uint8 {.
  373. importc: "_BitScanReverse", header: "<intrin.h>".}
  374. func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {.
  375. importc: "_BitScanReverse64", header: "<intrin.h>".}
  376. # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
  377. func bitScanForward(index: ptr culong, mask: culong): uint8 {.
  378. importc: "_BitScanForward", header: "<intrin.h>".}
  379. func bitScanForward64(index: ptr culong, mask: uint64): uint8 {.
  380. importc: "_BitScanForward64", header: "<intrin.h>".}
  381. template vcc_scan_impl(fnc: untyped; v: untyped): int =
  382. var index: culong
  383. discard fnc(index.addr, v)
  384. index.int
  385. elif useICC_builtins:
  386. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  387. func bitScanForward(p: ptr uint32, b: uint32): uint8 {.
  388. importc: "_BitScanForward", header: "<immintrin.h>".}
  389. func bitScanForward64(p: ptr uint32, b: uint64): uint8 {.
  390. importc: "_BitScanForward64", header: "<immintrin.h>".}
  391. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  392. func bitScanReverse(p: ptr uint32, b: uint32): uint8 {.
  393. importc: "_BitScanReverse", header: "<immintrin.h>".}
  394. func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {.
  395. importc: "_BitScanReverse64", header: "<immintrin.h>".}
  396. template icc_scan_impl(fnc: untyped; v: untyped): int =
  397. var index: uint32
  398. discard fnc(index.addr, v)
  399. index.int
  400. func countSetBits*(x: SomeInteger): int {.inline.} =
  401. ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
  402. runnableExamples:
  403. doAssert countSetBits(0b0000_0011'u8) == 2
  404. doAssert countSetBits(0b1010_1010'u8) == 4
  405. result = countSetBitsImpl(x)
  406. func popcount*(x: SomeInteger): int {.inline.} =
  407. ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight).
  408. result = countSetBits(x)
  409. func parityBits*(x: SomeInteger): int {.inline.} =
  410. ## Calculate the bit parity in an integer. If the number of 1-bits
  411. ## is odd, the parity is 1, otherwise 0.
  412. runnableExamples:
  413. doAssert parityBits(0b0000_0000'u8) == 0
  414. doAssert parityBits(0b0101_0001'u8) == 1
  415. doAssert parityBits(0b0110_1001'u8) == 0
  416. doAssert parityBits(0b0111_1111'u8) == 1
  417. # Can be used a base if creating ASM version.
  418. # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
  419. when x is SomeSignedInt:
  420. let x = x.toUnsigned
  421. when nimvm:
  422. result = forwardImpl(parityImpl, x)
  423. else:
  424. when useGCC_builtins:
  425. when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
  426. else: result = builtin_parityll(x.uint64).int
  427. else:
  428. when sizeof(x) <= 4: result = parityImpl(x.uint32)
  429. else: result = parityImpl(x.uint64)
  430. func firstSetBit*(x: SomeInteger): int {.inline.} =
  431. ## Returns the 1-based index of the least significant set bit of `x`.
  432. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  433. ## otherwise the result is undefined.
  434. runnableExamples:
  435. doAssert firstSetBit(0b0000_0001'u8) == 1
  436. doAssert firstSetBit(0b0000_0010'u8) == 2
  437. doAssert firstSetBit(0b0000_0100'u8) == 3
  438. doAssert firstSetBit(0b0000_1000'u8) == 4
  439. doAssert firstSetBit(0b0000_1111'u8) == 1
  440. # GCC builtin 'builtin_ffs' already handle zero input.
  441. when x is SomeSignedInt:
  442. let x = x.toUnsigned
  443. when nimvm:
  444. when noUndefined:
  445. if x == 0:
  446. return 0
  447. result = forwardImpl(firstSetBitNim, x)
  448. else:
  449. when noUndefined and not useGCC_builtins:
  450. if x == 0:
  451. return 0
  452. when useGCC_builtins:
  453. when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int
  454. else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int
  455. elif useVCC_builtins:
  456. when sizeof(x) <= 4:
  457. result = 1 + vcc_scan_impl(bitScanForward, x.culong)
  458. elif arch64:
  459. result = 1 + vcc_scan_impl(bitScanForward64, x.uint64)
  460. else:
  461. result = firstSetBitNim(x.uint64)
  462. elif useICC_builtins:
  463. when sizeof(x) <= 4:
  464. result = 1 + icc_scan_impl(bitScanForward, x.uint32)
  465. elif arch64:
  466. result = 1 + icc_scan_impl(bitScanForward64, x.uint64)
  467. else:
  468. result = firstSetBitNim(x.uint64)
  469. else:
  470. when sizeof(x) <= 4: result = firstSetBitNim(x.uint32)
  471. else: result = firstSetBitNim(x.uint64)
  472. func fastLog2*(x: SomeInteger): int {.inline.} =
  473. ## Quickly find the log base 2 of an integer.
  474. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is -1,
  475. ## otherwise the result is undefined.
  476. runnableExamples:
  477. doAssert fastLog2(0b0000_0001'u8) == 0
  478. doAssert fastLog2(0b0000_0010'u8) == 1
  479. doAssert fastLog2(0b0000_0100'u8) == 2
  480. doAssert fastLog2(0b0000_1000'u8) == 3
  481. doAssert fastLog2(0b0000_1111'u8) == 3
  482. when x is SomeSignedInt:
  483. let x = x.toUnsigned
  484. when noUndefined:
  485. if x == 0:
  486. return -1
  487. when nimvm:
  488. result = forwardImpl(fastlog2Nim, x)
  489. else:
  490. when useGCC_builtins:
  491. when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
  492. else: result = 63 - builtin_clzll(x.uint64).int
  493. elif useVCC_builtins:
  494. when sizeof(x) <= 4:
  495. result = vcc_scan_impl(bitScanReverse, x.culong)
  496. elif arch64:
  497. result = vcc_scan_impl(bitScanReverse64, x.uint64)
  498. else:
  499. result = fastlog2Nim(x.uint64)
  500. elif useICC_builtins:
  501. when sizeof(x) <= 4:
  502. result = icc_scan_impl(bitScanReverse, x.uint32)
  503. elif arch64:
  504. result = icc_scan_impl(bitScanReverse64, x.uint64)
  505. else:
  506. result = fastlog2Nim(x.uint64)
  507. else:
  508. when sizeof(x) <= 4: result = fastlog2Nim(x.uint32)
  509. else: result = fastlog2Nim(x.uint64)
  510. func countLeadingZeroBits*(x: SomeInteger): int {.inline.} =
  511. ## Returns the number of leading zero bits in an integer.
  512. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  513. ## otherwise the result is undefined.
  514. ##
  515. ## **See also:**
  516. ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_
  517. runnableExamples:
  518. doAssert countLeadingZeroBits(0b0000_0001'u8) == 7
  519. doAssert countLeadingZeroBits(0b0000_0010'u8) == 6
  520. doAssert countLeadingZeroBits(0b0000_0100'u8) == 5
  521. doAssert countLeadingZeroBits(0b0000_1000'u8) == 4
  522. doAssert countLeadingZeroBits(0b0000_1111'u8) == 4
  523. when x is SomeSignedInt:
  524. let x = x.toUnsigned
  525. when noUndefined:
  526. if x == 0:
  527. return 0
  528. when nimvm:
  529. result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x)
  530. else:
  531. when useGCC_builtins:
  532. when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
  533. else: result = builtin_clzll(x.uint64).int
  534. else:
  535. when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32)
  536. else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64)
  537. func countTrailingZeroBits*(x: SomeInteger): int {.inline.} =
  538. ## Returns the number of trailing zeros in an integer.
  539. ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
  540. ## otherwise the result is undefined.
  541. ##
  542. ## **See also:**
  543. ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_
  544. runnableExamples:
  545. doAssert countTrailingZeroBits(0b0000_0001'u8) == 0
  546. doAssert countTrailingZeroBits(0b0000_0010'u8) == 1
  547. doAssert countTrailingZeroBits(0b0000_0100'u8) == 2
  548. doAssert countTrailingZeroBits(0b0000_1000'u8) == 3
  549. doAssert countTrailingZeroBits(0b0000_1111'u8) == 0
  550. when x is SomeSignedInt:
  551. let x = x.toUnsigned
  552. when noUndefined:
  553. if x == 0:
  554. return 0
  555. when nimvm:
  556. result = firstSetBit(x) - 1
  557. else:
  558. when useGCC_builtins:
  559. when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int
  560. else: result = builtin_ctzll(x.uint64).int
  561. else:
  562. result = firstSetBit(x) - 1
  563. when useBuiltinsRotate:
  564. when defined(gcc):
  565. # GCC was tested until version 4.8.1 and intrinsics were present. Not tested
  566. # in previous versions.
  567. func builtin_rotl8(value: uint8, shift: cint): uint8
  568. {.importc: "__rolb", header: "<x86intrin.h>".}
  569. func builtin_rotl16(value: cushort, shift: cint): cushort
  570. {.importc: "__rolw", header: "<x86intrin.h>".}
  571. func builtin_rotl32(value: cuint, shift: cint): cuint
  572. {.importc: "__rold", header: "<x86intrin.h>".}
  573. when defined(amd64):
  574. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  575. {.importc: "__rolq", header: "<x86intrin.h>".}
  576. func builtin_rotr8(value: uint8, shift: cint): uint8
  577. {.importc: "__rorb", header: "<x86intrin.h>".}
  578. func builtin_rotr16(value: cushort, shift: cint): cushort
  579. {.importc: "__rorw", header: "<x86intrin.h>".}
  580. func builtin_rotr32(value: cuint, shift: cint): cuint
  581. {.importc: "__rord", header: "<x86intrin.h>".}
  582. when defined(amd64):
  583. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  584. {.importc: "__rorq", header: "<x86intrin.h>".}
  585. elif defined(clang):
  586. # In CLANG, builtins have been present since version 8.0.0 and intrinsics
  587. # since version 9.0.0. This implementation chose the builtins, as they have
  588. # been around for longer.
  589. # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release
  590. # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft
  591. # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def
  592. func builtin_rotl8(value: uint8, shift: uint8): uint8
  593. {.importc: "__builtin_rotateleft8", nodecl.}
  594. func builtin_rotl16(value: cushort, shift: cushort): cushort
  595. {.importc: "__builtin_rotateleft16", nodecl.}
  596. func builtin_rotl32(value: cuint, shift: cuint): cuint
  597. {.importc: "__builtin_rotateleft32", nodecl.}
  598. when defined(amd64):
  599. func builtin_rotl64(value: culonglong, shift: culonglong): culonglong
  600. {.importc: "__builtin_rotateleft64", nodecl.}
  601. func builtin_rotr8(value: uint8, shift: uint8): uint8
  602. {.importc: "__builtin_rotateright8", nodecl.}
  603. func builtin_rotr16(value: cushort, shift: cushort): cushort
  604. {.importc: "__builtin_rotateright16", nodecl.}
  605. func builtin_rotr32(value: cuint, shift: cuint): cuint
  606. {.importc: "__builtin_rotateright32", nodecl.}
  607. when defined(amd64):
  608. # shift is unsigned, refs https://github.com/llvm-mirror/clang/commit/892de415b7fde609dafc4e6c1643b7eaa0150a4d
  609. func builtin_rotr64(value: culonglong, shift: culonglong): culonglong
  610. {.importc: "__builtin_rotateright64", nodecl.}
  611. elif defined(vcc):
  612. # Tested on Microsoft (R) C/C++ Optimizing Compiler 19.28.29335 x64 and x86.
  613. # Not tested in previous versions.
  614. # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160
  615. # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160
  616. # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160
  617. func builtin_rotl8(value: uint8, shift: uint8): uint8
  618. {.importc: "_rotl8", header: "<intrin.h>".}
  619. func builtin_rotl16(value: cushort, shift: uint8): cushort
  620. {.importc: "_rotl16", header: "<intrin.h>".}
  621. func builtin_rotl32(value: cuint, shift: cint): cuint
  622. {.importc: "_rotl", header: "<stdlib.h>".}
  623. when defined(amd64):
  624. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  625. {.importc: "_rotl64", header: "<stdlib.h>".}
  626. func builtin_rotr8(value: uint8, shift: uint8): uint8
  627. {.importc: "_rotr8", header: "<intrin.h>".}
  628. func builtin_rotr16(value: cushort, shift: uint8): cushort
  629. {.importc: "_rotr16", header: "<intrin.h>".}
  630. func builtin_rotr32(value: cuint, shift: cint): cuint
  631. {.importc: "_rotr", header: "<stdlib.h>".}
  632. when defined(amd64):
  633. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  634. {.importc: "_rotr64", header: "<stdlib.h>".}
  635. elif defined(icl):
  636. # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build
  637. # 20201208_000000 x64 and x86. Not tested in previous versions.
  638. func builtin_rotl8(value: uint8, shift: cint): uint8
  639. {.importc: "__rolb", header: "<immintrin.h>".}
  640. func builtin_rotl16(value: cushort, shift: cint): cushort
  641. {.importc: "__rolw", header: "<immintrin.h>".}
  642. func builtin_rotl32(value: cuint, shift: cint): cuint
  643. {.importc: "__rold", header: "<immintrin.h>".}
  644. when defined(amd64):
  645. func builtin_rotl64(value: culonglong, shift: cint): culonglong
  646. {.importc: "__rolq", header: "<immintrin.h>".}
  647. func builtin_rotr8(value: uint8, shift: cint): uint8
  648. {.importc: "__rorb", header: "<immintrin.h>".}
  649. func builtin_rotr16(value: cushort, shift: cint): cushort
  650. {.importc: "__rorw", header: "<immintrin.h>".}
  651. func builtin_rotr32(value: cuint, shift: cint): cuint
  652. {.importc: "__rord", header: "<immintrin.h>".}
  653. when defined(amd64):
  654. func builtin_rotr64(value: culonglong, shift: cint): culonglong
  655. {.importc: "__rorq", header: "<immintrin.h>".}
  656. func rotl[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
  657. ## Left-rotate bits in a `value`.
  658. # https://stackoverflow.com/a/776523
  659. const mask = 8 * sizeof(value) - 1
  660. let rot = rot and mask
  661. (value shl rot) or (value shr ((-rot) and mask))
  662. func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
  663. ## Right-rotate bits in a `value`.
  664. const mask = 8 * sizeof(value) - 1
  665. let rot = rot and mask
  666. (value shr rot) or (value shl ((-rot) and mask))
  667. func shiftTypeTo(size: static int, shift: int): auto {.inline.} =
  668. ## Returns the `shift` for the rotation according to the compiler and the
  669. ## `size`.
  670. when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl):
  671. cint(shift)
  672. elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1):
  673. uint8(shift)
  674. elif defined(clang):
  675. when size == 2:
  676. cushort(shift)
  677. elif size == 4:
  678. cuint(shift)
  679. elif size == 8:
  680. culonglong(shift)
  681. func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
  682. ## Left-rotate bits in a `value`.
  683. runnableExamples:
  684. doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
  685. doAssert rotateLeftBits(0b00111100_11000011'u16, 8) ==
  686. 0b11000011_00111100'u16
  687. doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) ==
  688. 0b1111000000001111_0000111111110000'u32
  689. doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
  690. 0b11111111000000000000000011111111_00000000111111111111111100000000'u64
  691. when nimvm:
  692. rotl(value, shift.int32)
  693. else:
  694. when useBuiltinsRotate:
  695. const size = sizeof(T)
  696. when size == 1:
  697. builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T
  698. elif size == 2:
  699. builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T
  700. elif size == 4:
  701. builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T
  702. elif size == 8 and arch64:
  703. builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T
  704. else:
  705. rotl(value, shift.int32)
  706. else:
  707. rotl(value, shift.int32)
  708. func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
  709. ## Right-rotate bits in a `value`.
  710. runnableExamples:
  711. doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
  712. doAssert rotateRightBits(0b00111100_11000011'u16, 8) ==
  713. 0b11000011_00111100'u16
  714. doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) ==
  715. 0b1111000000001111_0000111111110000'u32
  716. doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
  717. 0b11111111000000000000000011111111_00000000111111111111111100000000'u64
  718. when nimvm:
  719. rotr(value, shift.int32)
  720. else:
  721. when useBuiltinsRotate:
  722. const size = sizeof(T)
  723. when size == 1:
  724. builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T
  725. elif size == 2:
  726. builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T
  727. elif size == 4:
  728. builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T
  729. elif size == 8 and arch64:
  730. builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T
  731. else:
  732. rotr(value, shift.int32)
  733. else:
  734. rotr(value, shift.int32)
  735. func repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T =
  736. result = x
  737. var i = 1
  738. while i != (sizeof(T) div sizeof(x)):
  739. result = (result shl (sizeof(x)*8*i)) or result
  740. i *= 2
  741. func reverseBits*[T: SomeUnsignedInt](x: T): T =
  742. ## Return the bit reversal of x.
  743. runnableExamples:
  744. doAssert reverseBits(0b10100100'u8) == 0b00100101'u8
  745. doAssert reverseBits(0xdd'u8) == 0xbb'u8
  746. doAssert reverseBits(0xddbb'u16) == 0xddbb'u16
  747. doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32
  748. template repeat(x: SomeUnsignedInt): T = repeatBits(x, T)
  749. result = x
  750. result =
  751. ((repeat(0x55u8) and result) shl 1) or
  752. ((repeat(0xaau8) and result) shr 1)
  753. result =
  754. ((repeat(0x33u8) and result) shl 2) or
  755. ((repeat(0xccu8) and result) shr 2)
  756. when sizeof(T) == 1:
  757. result = (result shl 4) or (result shr 4)
  758. when sizeof(T) >= 2:
  759. result =
  760. ((repeat(0x0fu8) and result) shl 4) or
  761. ((repeat(0xf0u8) and result) shr 4)
  762. when sizeof(T) == 2:
  763. result = (result shl 8) or (result shr 8)
  764. when sizeof(T) >= 4:
  765. result =
  766. ((repeat(0x00ffu16) and result) shl 8) or
  767. ((repeat(0xff00u16) and result) shr 8)
  768. when sizeof(T) == 4:
  769. result = (result shl 16) or (result shr 16)
  770. when sizeof(T) == 8:
  771. result =
  772. ((repeat(0x0000ffffu32) and result) shl 16) or
  773. ((repeat(0xffff0000u32) and result) shr 16)
  774. result = (result shl 32) or (result shr 32)