bitops.nim 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2017 Nim Authors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a series of low level methods for bit manipulation.
  10. ## By default, this module use compiler intrinsics where possible to improve performance
  11. ## on supported compilers: ``GCC``, ``LLVM_GCC``, ``CLANG``, ``VCC``, ``ICC``.
  12. ##
  13. ## The module will fallback to pure nim procs incase the backend is not supported.
  14. ## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics.
  15. ##
  16. ## This module is also compatible with other backends: ``Javascript``, ``Nimscript``
  17. ## as well as the ``compiletime VM``.
  18. ##
  19. ## As a result of using optimized function/intrinsics some functions can return
  20. ## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts`
  21. ## to force predictable behaviour for all input, causing a small performance hit.
  22. ##
  23. ## At this time only `fastLog2`, `firstSetBit, `countLeadingZeroBits`, `countTrailingZeroBits`
  24. ## may return undefined and/or platform dependent value if given invalid input.
  25. import macros
  26. import std/private/since
  27. proc bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI", noSideEffect.}
  28. ## Computes the `bitwise complement` of the integer `x`.
  29. func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".}
  30. func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".}
  31. func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".}
  32. macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  33. ## Computes the `bitwise and` of all arguments collectively.
  34. let fn = bindSym("internalBitand")
  35. result = newCall(fn, x, y)
  36. for extra in z:
  37. result = newCall(fn, result, extra)
  38. macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  39. ## Computes the `bitwise or` of all arguments collectively.
  40. let fn = bindSym("internalBitor")
  41. result = newCall(fn, x, y)
  42. for extra in z:
  43. result = newCall(fn, result, extra)
  44. macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
  45. ## Computes the `bitwise xor` of all arguments collectively.
  46. let fn = bindSym("internalBitxor")
  47. result = newCall(fn, x, y)
  48. for extra in z:
  49. result = newCall(fn, result, extra)
  50. const useBuiltins = not defined(noIntrinsicsBitOpts)
  51. const noUndefined = defined(noUndefinedBitOpts)
  52. const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or
  53. defined(clang)) and useBuiltins
  54. const useICC_builtins = defined(icc) and useBuiltins
  55. const useVCC_builtins = defined(vcc) and useBuiltins
  56. const arch64 = sizeof(int) == 8
  57. template toUnsigned(x: int8): uint8 = cast[uint8](x)
  58. template toUnsigned(x: int16): uint16 = cast[uint16](x)
  59. template toUnsigned(x: int32): uint32 = cast[uint32](x)
  60. template toUnsigned(x: int64): uint64 = cast[uint64](x)
  61. template toUnsigned(x: int): uint = cast[uint](x)
  62. template forwardImpl(impl, arg) {.dirty.} =
  63. when sizeof(x) <= 4:
  64. when x is SomeSignedInt:
  65. impl(cast[uint32](x.int32))
  66. else:
  67. impl(x.uint32)
  68. else:
  69. when x is SomeSignedInt:
  70. impl(cast[uint64](x.int64))
  71. else:
  72. impl(x.uint64)
  73. when defined(nimHasalignOf):
  74. type BitsRange*[T] = range[0..sizeof(T)*8-1]
  75. ## A range with all bit positions for type ``T``
  76. func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  77. ## Returns an extracted (and shifted) slice of bits from ``v``.
  78. runnableExamples:
  79. doAssert 0b10111.bitsliced(2 .. 4) == 0b101
  80. doAssert 0b11100.bitsliced(0 .. 2) == 0b100
  81. doAssert 0b11100.bitsliced(0 ..< 3) == 0b100
  82. let
  83. upmost = sizeof(T) * 8 - 1
  84. uv = when v is SomeUnsignedInt: v else: v.toUnsigned
  85. (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
  86. proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  87. ## Mutates ``v`` into an extracted (and shifted) slice of bits from ``v``.
  88. runnableExamples:
  89. var x = 0b101110
  90. x.bitslice(2 .. 4)
  91. doAssert x == 0b011
  92. let
  93. upmost = sizeof(T) * 8 - 1
  94. uv = when v is SomeUnsignedInt: v else: v.toUnsigned
  95. v = (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
  96. func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
  97. ## Creates a bitmask based on a slice of bits.
  98. runnableExamples:
  99. doAssert toMask[int32](1 .. 3) == 0b1110'i32
  100. doAssert toMask[int32](0 .. 3) == 0b1111'i32
  101. let
  102. upmost = sizeof(T) * 8 - 1
  103. bitmask = when T is SomeUnsignedInt:
  104. bitnot(0.T)
  105. else:
  106. bitnot(0.T).toUnsigned
  107. (bitmask shl (upmost - slice.b + slice.a) shr (upmost - slice.b)).T
  108. proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  109. ## Returns ``v``, with only the ``1`` bits from ``mask`` matching those of
  110. ## ``v`` set to 1.
  111. ##
  112. ## Effectively maps to a `bitand` operation.
  113. runnableExamples:
  114. var v = 0b0000_0011'u8
  115. doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8
  116. bitand(v, mask)
  117. func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  118. ## Mutates ``v``, with only the ``1`` bits in the range of ``slice``
  119. ## matching those of ``v`` set to 1.
  120. ##
  121. ## Effectively maps to a `bitand` operation.
  122. runnableExamples:
  123. var v = 0b0000_1011'u8
  124. doAssert v.masked(1 .. 3) == 0b0000_1010'u8
  125. bitand(v, toMask[T](slice))
  126. proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} =
  127. ## Mutates ``v``, with only the ``1`` bits from ``mask`` matching those of
  128. ## ``v`` set to 1.
  129. ##
  130. ## Effectively maps to a `bitand` operation.
  131. runnableExamples:
  132. var v = 0b0000_0011'u8
  133. v.mask(0b0000_1010'u8)
  134. doAssert v == 0b0000_0010'u8
  135. v = bitand(v, mask)
  136. proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  137. ## Mutates ``v``, with only the ``1`` bits in the range of ``slice``
  138. ## matching those of ``v`` set to 1.
  139. ##
  140. ## Effectively maps to a `bitand` operation.
  141. runnableExamples:
  142. var v = 0b0000_1011'u8
  143. v.mask(1 .. 3)
  144. doAssert v == 0b0000_1010'u8
  145. v = bitand(v, toMask[T](slice))
  146. func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  147. ## Returns ``v``, with all the ``1`` bits from ``mask`` set to 1.
  148. ##
  149. ## Effectively maps to a `bitor` operation.
  150. runnableExamples:
  151. var v = 0b0000_0011'u8
  152. doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8
  153. bitor(v, mask)
  154. func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  155. ## Returns ``v``, with all the ``1`` bits in the range of ``slice`` set to 1.
  156. ##
  157. ## Effectively maps to a `bitor` operation.
  158. runnableExamples:
  159. var v = 0b0000_0011'u8
  160. doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8
  161. bitor(v, toMask[T](slice))
  162. proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  163. ## Mutates ``v``, with all the ``1`` bits from ``mask`` set to 1.
  164. ##
  165. ## Effectively maps to a `bitor` operation.
  166. runnableExamples:
  167. var v = 0b0000_0011'u8
  168. v.setMask(0b0000_1010'u8)
  169. doAssert v == 0b0000_1011'u8
  170. v = bitor(v, mask)
  171. proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  172. ## Mutates ``v``, with all the ``1`` bits in the range of ``slice`` set to 1.
  173. ##
  174. ## Effectively maps to a `bitor` operation.
  175. runnableExamples:
  176. var v = 0b0000_0011'u8
  177. v.setMask(2 .. 3)
  178. doAssert v == 0b0000_1111'u8
  179. v = bitor(v, toMask[T](slice))
  180. func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  181. ## Returns ``v``, with all the ``1`` bits from ``mask`` set to 0.
  182. ##
  183. ## Effectively maps to a `bitand` operation with an *inverted mask.*
  184. runnableExamples:
  185. var v = 0b0000_0011'u8
  186. doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8
  187. bitand(v, bitnot(mask))
  188. func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  189. ## Returns ``v``, with all the ``1`` bits in the range of ``slice`` set to 0.
  190. ##
  191. ## Effectively maps to a `bitand` operation with an *inverted mask.*
  192. runnableExamples:
  193. var v = 0b0000_0011'u8
  194. doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8
  195. bitand(v, bitnot(toMask[T](slice)))
  196. proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  197. ## Mutates ``v``, with all the ``1`` bits from ``mask`` set to 0.
  198. ##
  199. ## Effectively maps to a `bitand` operation with an *inverted mask.*
  200. runnableExamples:
  201. var v = 0b0000_0011'u8
  202. v.clearMask(0b0000_1010'u8)
  203. doAssert v == 0b0000_0001'u8
  204. v = bitand(v, bitnot(mask))
  205. proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  206. ## Mutates ``v``, with all the ``1`` bits in the range of ``slice`` set to 0.
  207. ##
  208. ## Effectively maps to a `bitand` operation with an *inverted mask.*
  209. runnableExamples:
  210. var v = 0b0000_0011'u8
  211. v.clearMask(1 .. 3)
  212. doAssert v == 0b0000_0001'u8
  213. v = bitand(v, bitnot(toMask[T](slice)))
  214. func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
  215. ## Returns ``v``, with all the ``1`` bits from ``mask`` flipped.
  216. ##
  217. ## Effectively maps to a `bitxor` operation.
  218. runnableExamples:
  219. var v = 0b0000_0011'u8
  220. doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8
  221. bitxor(v, mask)
  222. func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
  223. ## Returns ``v``, with all the ``1`` bits in the range of ``slice`` flipped.
  224. ##
  225. ## Effectively maps to a `bitxor` operation.
  226. runnableExamples:
  227. var v = 0b0000_0011'u8
  228. doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8
  229. bitxor(v, toMask[T](slice))
  230. proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
  231. ## Mutates ``v``, with all the ``1`` bits from ``mask`` flipped.
  232. ##
  233. ## Effectively maps to a `bitxor` operation.
  234. runnableExamples:
  235. var v = 0b0000_0011'u8
  236. v.flipMask(0b0000_1010'u8)
  237. doAssert v == 0b0000_1001'u8
  238. v = bitxor(v, mask)
  239. proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
  240. ## Mutates ``v``, with all the ``1`` bits in the range of ``slice`` flipped.
  241. ##
  242. ## Effectively maps to a `bitxor` operation.
  243. runnableExamples:
  244. var v = 0b0000_0011'u8
  245. v.flipMask(1 .. 3)
  246. doAssert v == 0b0000_1101'u8
  247. v = bitxor(v, toMask[T](slice))
  248. proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  249. ## Mutates ``v``, with the bit at position ``bit`` set to 1
  250. runnableExamples:
  251. var v = 0b0000_0011'u8
  252. v.setBit(5'u8)
  253. doAssert v == 0b0010_0011'u8
  254. v.setMask(1.T shl bit)
  255. proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  256. ## Mutates ``v``, with the bit at position ``bit`` set to 0
  257. runnableExamples:
  258. var v = 0b0000_0011'u8
  259. v.clearBit(1'u8)
  260. doAssert v == 0b0000_0001'u8
  261. v.clearMask(1.T shl bit)
  262. proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
  263. ## Mutates ``v``, with the bit at position ``bit`` flipped
  264. runnableExamples:
  265. var v = 0b0000_0011'u8
  266. v.flipBit(1'u8)
  267. doAssert v == 0b0000_0001'u8
  268. v = 0b0000_0011'u8
  269. v.flipBit(2'u8)
  270. doAssert v == 0b0000_0111'u8
  271. v.flipMask(1.T shl bit)
  272. macro setBits*(v: typed; bits: varargs[typed]): untyped =
  273. ## Mutates ``v``, with the bits at positions ``bits`` set to 1
  274. runnableExamples:
  275. var v = 0b0000_0011'u8
  276. v.setBits(3, 5, 7)
  277. doAssert v == 0b1010_1011'u8
  278. bits.expectKind(nnkBracket)
  279. result = newStmtList()
  280. for bit in bits:
  281. result.add newCall("setBit", v, bit)
  282. macro clearBits*(v: typed; bits: varargs[typed]): untyped =
  283. ## Mutates ``v``, with the bits at positions ``bits`` set to 0
  284. runnableExamples:
  285. var v = 0b1111_1111'u8
  286. v.clearBits(1, 3, 5, 7)
  287. doAssert v == 0b0101_0101'u8
  288. bits.expectKind(nnkBracket)
  289. result = newStmtList()
  290. for bit in bits:
  291. result.add newCall("clearBit", v, bit)
  292. macro flipBits*(v: typed; bits: varargs[typed]): untyped =
  293. ## Mutates ``v``, with the bits at positions ``bits`` set to 0
  294. runnableExamples:
  295. var v = 0b0000_1111'u8
  296. v.flipBits(1, 3, 5, 7)
  297. doAssert v == 0b1010_0101'u8
  298. bits.expectKind(nnkBracket)
  299. result = newStmtList()
  300. for bit in bits:
  301. result.add newCall("flipBit", v, bit)
  302. proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} =
  303. ## Returns true if the bit in ``v`` at positions ``bit`` is set to 1
  304. runnableExamples:
  305. var v = 0b0000_1111'u8
  306. doAssert v.testBit(0)
  307. doAssert not v.testBit(7)
  308. let mask = 1.T shl bit
  309. return (v and mask) == mask
  310. # #### Pure Nim version ####
  311. proc firstSetBitNim(x: uint32): int {.inline, noSideEffect.} =
  312. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  313. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  314. const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15,
  315. 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
  316. var v = x.uint32
  317. var k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v))
  318. result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int
  319. proc firstSetBitNim(x: uint64): int {.inline, noSideEffect.} =
  320. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
  321. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
  322. var v = uint64(x)
  323. var k = uint32(v and 0xFFFFFFFF'u32)
  324. if k == 0:
  325. k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32
  326. result = 32
  327. else:
  328. result = 0
  329. result += firstSetBitNim(k)
  330. proc fastlog2Nim(x: uint32): int {.inline, noSideEffect.} =
  331. ## Quickly find the log base 2 of a 32-bit or less integer.
  332. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  333. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  334. const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
  335. 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
  336. var v = x.uint32
  337. v = v or v shr 1 # first round down to one less than a power of 2
  338. v = v or v shr 2
  339. v = v or v shr 4
  340. v = v or v shr 8
  341. v = v or v shr 16
  342. result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
  343. proc fastlog2Nim(x: uint64): int {.inline, noSideEffect.} =
  344. ## Quickly find the log base 2 of a 64-bit integer.
  345. # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
  346. # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
  347. const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
  348. 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
  349. 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
  350. 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
  351. var v = x.uint64
  352. v = v or v shr 1 # first round down to one less than a power of 2
  353. v = v or v shr 2
  354. v = v or v shr 4
  355. v = v or v shr 8
  356. v = v or v shr 16
  357. v = v or v shr 32
  358. result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
  359. # sets.nim cannot import bitops, but bitops can use include
  360. # system/sets to eliminate code duplication. sets.nim defines
  361. # countBits32 and countBits64.
  362. include system/sets
  363. template countSetBitsNim(n: uint32): int = countBits32(n)
  364. template countSetBitsNim(n: uint64): int = countBits64(n)
  365. template parityImpl[T](value: T): int =
  366. # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
  367. var v = value
  368. when sizeof(T) == 8:
  369. v = v xor (v shr 32)
  370. when sizeof(T) >= 4:
  371. v = v xor (v shr 16)
  372. when sizeof(T) >= 2:
  373. v = v xor (v shr 8)
  374. v = v xor (v shr 4)
  375. v = v and 0xf
  376. ((0x6996'u shr v) and 1).int
  377. when useGCC_builtins:
  378. # Returns the number of set 1-bits in value.
  379. proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
  380. proc builtin_popcountll(x: culonglong): cint {.
  381. importc: "__builtin_popcountll", cdecl.}
  382. # Returns the bit parity in value
  383. proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.}
  384. proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.}
  385. # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
  386. proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.}
  387. proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.}
  388. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  389. proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.}
  390. proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.}
  391. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  392. proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.}
  393. proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.}
  394. elif useVCC_builtins:
  395. # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
  396. proc builtin_popcnt16(a2: uint16): uint16 {.
  397. importc: "__popcnt16"header: "<intrin.h>", noSideEffect.}
  398. proc builtin_popcnt32(a2: uint32): uint32 {.
  399. importc: "__popcnt"header: "<intrin.h>", noSideEffect.}
  400. proc builtin_popcnt64(a2: uint64): uint64 {.
  401. importc: "__popcnt64"header: "<intrin.h>", noSideEffect.}
  402. # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
  403. proc bitScanReverse(index: ptr culong, mask: culong): cuchar {.
  404. importc: "_BitScanReverse", header: "<intrin.h>", noSideEffect.}
  405. proc bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.
  406. importc: "_BitScanReverse64", header: "<intrin.h>", noSideEffect.}
  407. # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
  408. proc bitScanForward(index: ptr culong, mask: culong): cuchar {.
  409. importc: "_BitScanForward", header: "<intrin.h>", noSideEffect.}
  410. proc bitScanForward64(index: ptr culong, mask: uint64): cuchar {.
  411. importc: "_BitScanForward64", header: "<intrin.h>", noSideEffect.}
  412. template vcc_scan_impl(fnc: untyped; v: untyped): int =
  413. var index: culong
  414. discard fnc(index.addr, v)
  415. index.int
  416. elif useICC_builtins:
  417. # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
  418. # see also: https://software.intel.com/en-us/node/523362
  419. # Count the number of bits set to 1 in an integer a, and return that count in dst.
  420. proc builtin_popcnt32(a: cint): cint {.
  421. importc: "_popcnt"header: "<immintrin.h>", noSideEffect.}
  422. proc builtin_popcnt64(a: uint64): cint {.
  423. importc: "_popcnt64"header: "<immintrin.h>", noSideEffect.}
  424. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
  425. proc bitScanForward(p: ptr uint32, b: uint32): cuchar {.
  426. importc: "_BitScanForward", header: "<immintrin.h>", noSideEffect.}
  427. proc bitScanForward64(p: ptr uint32, b: uint64): cuchar {.
  428. importc: "_BitScanForward64", header: "<immintrin.h>", noSideEffect.}
  429. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
  430. proc bitScanReverse(p: ptr uint32, b: uint32): cuchar {.
  431. importc: "_BitScanReverse", header: "<immintrin.h>", noSideEffect.}
  432. proc bitScanReverse64(p: ptr uint32, b: uint64): cuchar {.
  433. importc: "_BitScanReverse64", header: "<immintrin.h>", noSideEffect.}
  434. template icc_scan_impl(fnc: untyped; v: untyped): int =
  435. var index: uint32
  436. discard fnc(index.addr, v)
  437. index.int
  438. proc countSetBits*(x: SomeInteger): int {.inline, noSideEffect.} =
  439. ## Counts the set bits in integer. (also called `Hamming weight`:idx:.)
  440. runnableExamples:
  441. doAssert countSetBits(0b0000_0011'u8) == 2
  442. doAssert countSetBits(0b1010_1010'u8) == 4
  443. # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
  444. # like GCC and MSVC
  445. when x is SomeSignedInt:
  446. let x = x.toUnsigned
  447. when nimvm:
  448. result = forwardImpl(countSetBitsNim, x)
  449. else:
  450. when useGCC_builtins:
  451. when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
  452. else: result = builtin_popcountll(x.culonglong).int
  453. elif useVCC_builtins:
  454. when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
  455. elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
  456. elif arch64: result = builtin_popcnt64(x.uint64).int
  457. else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
  458. builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
  459. elif useICC_builtins:
  460. when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
  461. elif arch64: result = builtin_popcnt64(x.uint64).int
  462. else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
  463. builtin_popcnt32((x.uint64 shr 32'u64).cint).int
  464. else:
  465. when sizeof(x) <= 4: result = countSetBitsNim(x.uint32)
  466. else: result = countSetBitsNim(x.uint64)
  467. proc popcount*(x: SomeInteger): int {.inline, noSideEffect.} =
  468. ## Alias for for `countSetBits <#countSetBits,SomeInteger>`_. (Hamming weight.)
  469. result = countSetBits(x)
  470. proc parityBits*(x: SomeInteger): int {.inline, noSideEffect.} =
  471. ## Calculate the bit parity in integer. If number of 1-bit
  472. ## is odd parity is 1, otherwise 0.
  473. runnableExamples:
  474. doAssert parityBits(0b0000_0000'u8) == 0
  475. doAssert parityBits(0b0101_0001'u8) == 1
  476. doAssert parityBits(0b0110_1001'u8) == 0
  477. doAssert parityBits(0b0111_1111'u8) == 1
  478. # Can be used a base if creating ASM version.
  479. # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
  480. when x is SomeSignedInt:
  481. let x = x.toUnsigned
  482. when nimvm:
  483. result = forwardImpl(parityImpl, x)
  484. else:
  485. when useGCC_builtins:
  486. when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
  487. else: result = builtin_parityll(x.uint64).int
  488. else:
  489. when sizeof(x) <= 4: result = parityImpl(x.uint32)
  490. else: result = parityImpl(x.uint64)
  491. proc firstSetBit*(x: SomeInteger): int {.inline, noSideEffect.} =
  492. ## Returns the 1-based index of the least significant set bit of x.
  493. ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is 0,
  494. ## otherwise result is undefined.
  495. runnableExamples:
  496. doAssert firstSetBit(0b0000_0001'u8) == 1
  497. doAssert firstSetBit(0b0000_0010'u8) == 2
  498. doAssert firstSetBit(0b0000_0100'u8) == 3
  499. doAssert firstSetBit(0b0000_1000'u8) == 4
  500. doAssert firstSetBit(0b0000_1111'u8) == 1
  501. # GCC builtin 'builtin_ffs' already handle zero input.
  502. when x is SomeSignedInt:
  503. let x = x.toUnsigned
  504. when nimvm:
  505. when noUndefined:
  506. if x == 0:
  507. return 0
  508. result = forwardImpl(firstSetBitNim, x)
  509. else:
  510. when noUndefined and not useGCC_builtins:
  511. if x == 0:
  512. return 0
  513. when useGCC_builtins:
  514. when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int
  515. else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int
  516. elif useVCC_builtins:
  517. when sizeof(x) <= 4:
  518. result = 1 + vcc_scan_impl(bitScanForward, x.culong)
  519. elif arch64:
  520. result = 1 + vcc_scan_impl(bitScanForward64, x.uint64)
  521. else:
  522. result = firstSetBitNim(x.uint64)
  523. elif useICC_builtins:
  524. when sizeof(x) <= 4:
  525. result = 1 + icc_scan_impl(bitScanForward, x.uint32)
  526. elif arch64:
  527. result = 1 + icc_scan_impl(bitScanForward64, x.uint64)
  528. else:
  529. result = firstSetBitNim(x.uint64)
  530. else:
  531. when sizeof(x) <= 4: result = firstSetBitNim(x.uint32)
  532. else: result = firstSetBitNim(x.uint64)
  533. proc fastLog2*(x: SomeInteger): int {.inline, noSideEffect.} =
  534. ## Quickly find the log base 2 of an integer.
  535. ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is -1,
  536. ## otherwise result is undefined.
  537. runnableExamples:
  538. doAssert fastLog2(0b0000_0001'u8) == 0
  539. doAssert fastLog2(0b0000_0010'u8) == 1
  540. doAssert fastLog2(0b0000_0100'u8) == 2
  541. doAssert fastLog2(0b0000_1000'u8) == 3
  542. doAssert fastLog2(0b0000_1111'u8) == 3
  543. when x is SomeSignedInt:
  544. let x = x.toUnsigned
  545. when noUndefined:
  546. if x == 0:
  547. return -1
  548. when nimvm:
  549. result = forwardImpl(fastlog2Nim, x)
  550. else:
  551. when useGCC_builtins:
  552. when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
  553. else: result = 63 - builtin_clzll(x.uint64).int
  554. elif useVCC_builtins:
  555. when sizeof(x) <= 4:
  556. result = vcc_scan_impl(bitScanReverse, x.culong)
  557. elif arch64:
  558. result = vcc_scan_impl(bitScanReverse64, x.uint64)
  559. else:
  560. result = fastlog2Nim(x.uint64)
  561. elif useICC_builtins:
  562. when sizeof(x) <= 4:
  563. result = icc_scan_impl(bitScanReverse, x.uint32)
  564. elif arch64:
  565. result = icc_scan_impl(bitScanReverse64, x.uint64)
  566. else:
  567. result = fastlog2Nim(x.uint64)
  568. else:
  569. when sizeof(x) <= 4: result = fastlog2Nim(x.uint32)
  570. else: result = fastlog2Nim(x.uint64)
  571. proc countLeadingZeroBits*(x: SomeInteger): int {.inline, noSideEffect.} =
  572. ## Returns the number of leading zero bits in integer.
  573. ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is 0,
  574. ## otherwise result is undefined.
  575. ##
  576. ## See also:
  577. ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_
  578. runnableExamples:
  579. doAssert countLeadingZeroBits(0b0000_0001'u8) == 7
  580. doAssert countLeadingZeroBits(0b0000_0010'u8) == 6
  581. doAssert countLeadingZeroBits(0b0000_0100'u8) == 5
  582. doAssert countLeadingZeroBits(0b0000_1000'u8) == 4
  583. doAssert countLeadingZeroBits(0b0000_1111'u8) == 4
  584. when x is SomeSignedInt:
  585. let x = x.toUnsigned
  586. when noUndefined:
  587. if x == 0:
  588. return 0
  589. when nimvm:
  590. result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x)
  591. else:
  592. when useGCC_builtins:
  593. when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
  594. else: result = builtin_clzll(x.uint64).int
  595. else:
  596. when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32)
  597. else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64)
  598. proc countTrailingZeroBits*(x: SomeInteger): int {.inline, noSideEffect.} =
  599. ## Returns the number of trailing zeros in integer.
  600. ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is 0,
  601. ## otherwise result is undefined.
  602. ##
  603. ## See also:
  604. ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_
  605. runnableExamples:
  606. doAssert countTrailingZeroBits(0b0000_0001'u8) == 0
  607. doAssert countTrailingZeroBits(0b0000_0010'u8) == 1
  608. doAssert countTrailingZeroBits(0b0000_0100'u8) == 2
  609. doAssert countTrailingZeroBits(0b0000_1000'u8) == 3
  610. doAssert countTrailingZeroBits(0b0000_1111'u8) == 0
  611. when x is SomeSignedInt:
  612. let x = x.toUnsigned
  613. when noUndefined:
  614. if x == 0:
  615. return 0
  616. when nimvm:
  617. result = firstSetBit(x) - 1
  618. else:
  619. when useGCC_builtins:
  620. when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int
  621. else: result = builtin_ctzll(x.uint64).int
  622. else:
  623. result = firstSetBit(x) - 1
  624. proc rotateLeftBits*(value: uint8;
  625. amount: range[0..8]): uint8 {.inline, noSideEffect.} =
  626. ## Left-rotate bits in a 8-bits value.
  627. runnableExamples:
  628. doAssert rotateLeftBits(0b0000_0001'u8, 1) == 0b0000_0010'u8
  629. doAssert rotateLeftBits(0b0000_0001'u8, 2) == 0b0000_0100'u8
  630. doAssert rotateLeftBits(0b0100_0001'u8, 1) == 0b1000_0010'u8
  631. doAssert rotateLeftBits(0b0100_0001'u8, 2) == 0b0000_0101'u8
  632. # using this form instead of the one below should handle any value
  633. # out of range as well as negative values.
  634. # result = (value shl amount) or (value shr (8 - amount))
  635. # taken from: https://en.wikipedia.org/wiki/Circular_shift#Implementing_circular_shifts
  636. let amount = amount and 7
  637. result = (value shl amount) or (value shr ( (-amount) and 7))
  638. proc rotateLeftBits*(value: uint16;
  639. amount: range[0..16]): uint16 {.inline, noSideEffect.} =
  640. ## Left-rotate bits in a 16-bits value.
  641. ##
  642. ## See also:
  643. ## * `rotateLeftBits proc <#rotateLeftBits,uint8,range[]>`_
  644. let amount = amount and 15
  645. result = (value shl amount) or (value shr ( (-amount) and 15))
  646. proc rotateLeftBits*(value: uint32;
  647. amount: range[0..32]): uint32 {.inline, noSideEffect.} =
  648. ## Left-rotate bits in a 32-bits value.
  649. ##
  650. ## See also:
  651. ## * `rotateLeftBits proc <#rotateLeftBits,uint8,range[]>`_
  652. let amount = amount and 31
  653. result = (value shl amount) or (value shr ( (-amount) and 31))
  654. proc rotateLeftBits*(value: uint64;
  655. amount: range[0..64]): uint64 {.inline, noSideEffect.} =
  656. ## Left-rotate bits in a 64-bits value.
  657. ##
  658. ## See also:
  659. ## * `rotateLeftBits proc <#rotateLeftBits,uint8,range[]>`_
  660. let amount = amount and 63
  661. result = (value shl amount) or (value shr ( (-amount) and 63))
  662. proc rotateRightBits*(value: uint8;
  663. amount: range[0..8]): uint8 {.inline, noSideEffect.} =
  664. ## Right-rotate bits in a 8-bits value.
  665. runnableExamples:
  666. doAssert rotateRightBits(0b0000_0001'u8, 1) == 0b1000_0000'u8
  667. doAssert rotateRightBits(0b0000_0001'u8, 2) == 0b0100_0000'u8
  668. doAssert rotateRightBits(0b0100_0001'u8, 1) == 0b1010_0000'u8
  669. doAssert rotateRightBits(0b0100_0001'u8, 2) == 0b0101_0000'u8
  670. let amount = amount and 7
  671. result = (value shr amount) or (value shl ( (-amount) and 7))
  672. proc rotateRightBits*(value: uint16;
  673. amount: range[0..16]): uint16 {.inline, noSideEffect.} =
  674. ## Right-rotate bits in a 16-bits value.
  675. ##
  676. ## See also:
  677. ## * `rotateRightBits proc <#rotateRightBits,uint8,range[]>`_
  678. let amount = amount and 15
  679. result = (value shr amount) or (value shl ( (-amount) and 15))
  680. proc rotateRightBits*(value: uint32;
  681. amount: range[0..32]): uint32 {.inline, noSideEffect.} =
  682. ## Right-rotate bits in a 32-bits value.
  683. ##
  684. ## See also:
  685. ## * `rotateRightBits proc <#rotateRightBits,uint8,range[]>`_
  686. let amount = amount and 31
  687. result = (value shr amount) or (value shl ( (-amount) and 31))
  688. proc rotateRightBits*(value: uint64;
  689. amount: range[0..64]): uint64 {.inline, noSideEffect.} =
  690. ## Right-rotate bits in a 64-bits value.
  691. ##
  692. ## See also:
  693. ## * `rotateRightBits proc <#rotateRightBits,uint8,range[]>`_
  694. let amount = amount and 63
  695. result = (value shr amount) or (value shl ( (-amount) and 63))
  696. proc repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T {.
  697. noSideEffect.} =
  698. result = x
  699. var i = 1
  700. while i != (sizeof(T) div sizeof(x)):
  701. result = (result shl (sizeof(x)*8*i)) or result
  702. i *= 2
  703. proc reverseBits*[T: SomeUnsignedInt](x: T): T {.noSideEffect.} =
  704. ## Return the bit reversal of x.
  705. runnableExamples:
  706. doAssert reverseBits(0b10100100'u8) == 0b00100101'u8
  707. doAssert reverseBits(0xdd'u8) == 0xbb'u8
  708. doAssert reverseBits(0xddbb'u16) == 0xddbb'u16
  709. doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32
  710. template repeat(x: SomeUnsignedInt): T = repeatBits(x, T)
  711. result = x
  712. result =
  713. ((repeat(0x55u8) and result) shl 1) or
  714. ((repeat(0xaau8) and result) shr 1)
  715. result =
  716. ((repeat(0x33u8) and result) shl 2) or
  717. ((repeat(0xccu8) and result) shr 2)
  718. when sizeof(T) == 1:
  719. result = (result shl 4) or (result shr 4)
  720. when sizeof(T) >= 2:
  721. result =
  722. ((repeat(0x0fu8) and result) shl 4) or
  723. ((repeat(0xf0u8) and result) shr 4)
  724. when sizeof(T) == 2:
  725. result = (result shl 8) or (result shr 8)
  726. when sizeof(T) >= 4:
  727. result =
  728. ((repeat(0x00ffu16) and result) shl 8) or
  729. ((repeat(0xff00u16) and result) shr 8)
  730. when sizeof(T) == 4:
  731. result = (result shl 16) or (result shr 16)
  732. when sizeof(T) == 8:
  733. result =
  734. ((repeat(0x0000ffffu32) and result) shl 16) or
  735. ((repeat(0xffff0000u32) and result) shr 16)
  736. result = (result shl 32) or (result shr 32)