tuklib_integer.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file tuklib_integer.h
  4. /// \brief Various integer and bit operations
  5. ///
  6. /// This file provides macros or functions to do some basic integer and bit
  7. /// operations.
  8. ///
  9. /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
  10. /// - Byte swapping: bswapXX(num)
  11. /// - Byte order conversions to/from native: convXXYe(num)
  12. /// - Aligned reads: readXXYe(ptr)
  13. /// - Aligned writes: writeXXYe(ptr, num)
  14. /// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
  15. /// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
  16. ///
  17. /// Since they can macros, the arguments should have no side effects since
  18. /// they may be evaluated more than once.
  19. ///
  20. /// \todo PowerPC and possibly some other architectures support
  21. /// byte swapping load and store instructions. This file
  22. /// doesn't take advantage of those instructions.
  23. ///
  24. /// Bit scan operations for non-zero 32-bit integers:
  25. /// - Bit scan reverse (find highest non-zero bit): bsr32(num)
  26. /// - Count leading zeros: clz32(num)
  27. /// - Count trailing zeros: ctz32(num)
  28. /// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  29. ///
  30. /// The above bit scan operations return 0-31. If num is zero,
  31. /// the result is undefined.
  32. //
  33. // Authors: Lasse Collin
  34. // Joachim Henke
  35. //
  36. // This file has been put into the public domain.
  37. // You can do whatever you want with this file.
  38. //
  39. ///////////////////////////////////////////////////////////////////////////////
  40. #ifndef TUKLIB_INTEGER_H
  41. #define TUKLIB_INTEGER_H
  42. #include "tuklib_common.h"
  43. ////////////////////////////////////////
  44. // Operating system specific features //
  45. ////////////////////////////////////////
  46. #if defined(HAVE_BYTESWAP_H)
  47. // glibc, uClibc, dietlibc
  48. # include <byteswap.h>
  49. # ifdef HAVE_BSWAP_16
  50. # define bswap16(num) bswap_16(num)
  51. # endif
  52. # ifdef HAVE_BSWAP_32
  53. # define bswap32(num) bswap_32(num)
  54. # endif
  55. # ifdef HAVE_BSWAP_64
  56. # define bswap64(num) bswap_64(num)
  57. # endif
  58. #elif defined(HAVE_SYS_ENDIAN_H)
  59. // *BSDs and Darwin
  60. # include <sys/endian.h>
  61. #elif defined(HAVE_SYS_BYTEORDER_H)
  62. // Solaris
  63. # include <sys/byteorder.h>
  64. # ifdef BSWAP_16
  65. # define bswap16(num) BSWAP_16(num)
  66. # endif
  67. # ifdef BSWAP_32
  68. # define bswap32(num) BSWAP_32(num)
  69. # endif
  70. # ifdef BSWAP_64
  71. # define bswap64(num) BSWAP_64(num)
  72. # endif
  73. # ifdef BE_16
  74. # define conv16be(num) BE_16(num)
  75. # endif
  76. # ifdef BE_32
  77. # define conv32be(num) BE_32(num)
  78. # endif
  79. # ifdef BE_64
  80. # define conv64be(num) BE_64(num)
  81. # endif
  82. # ifdef LE_16
  83. # define conv16le(num) LE_16(num)
  84. # endif
  85. # ifdef LE_32
  86. # define conv32le(num) LE_32(num)
  87. # endif
  88. # ifdef LE_64
  89. # define conv64le(num) LE_64(num)
  90. # endif
  91. #endif
  92. #ifdef _MSC_VER
  93. #include <Windows.h>
  94. #endif
  95. ////////////////////////////////
  96. // Compiler-specific features //
  97. ////////////////////////////////
  98. // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
  99. // and such functions.
  100. #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
  101. # include <immintrin.h>
  102. #endif
  103. ///////////////////
  104. // Byte swapping //
  105. ///////////////////
  106. #ifndef bswap16
  107. # define bswap16(num) \
  108. (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
  109. #endif
  110. #ifndef bswap32
  111. # define bswap32(num) \
  112. ( (((uint32_t)(num) << 24) ) \
  113. | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \
  114. | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \
  115. | (((uint32_t)(num) >> 24) ) )
  116. #endif
  117. #ifndef bswap64
  118. # define bswap64(num) \
  119. ( (((uint64_t)(num) << 56) ) \
  120. | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
  121. | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
  122. | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \
  123. | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \
  124. | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
  125. | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
  126. | (((uint64_t)(num) >> 56) ) )
  127. #endif
  128. // Define conversion macros using the basic byte swapping macros.
  129. #ifdef WORDS_BIGENDIAN
  130. # ifndef conv16be
  131. # define conv16be(num) ((uint16_t)(num))
  132. # endif
  133. # ifndef conv32be
  134. # define conv32be(num) ((uint32_t)(num))
  135. # endif
  136. # ifndef conv64be
  137. # define conv64be(num) ((uint64_t)(num))
  138. # endif
  139. # ifndef conv16le
  140. # define conv16le(num) bswap16(num)
  141. # endif
  142. # ifndef conv32le
  143. # define conv32le(num) bswap32(num)
  144. # endif
  145. # ifndef conv64le
  146. # define conv64le(num) bswap64(num)
  147. # endif
  148. #else
  149. # ifndef conv16be
  150. # define conv16be(num) bswap16(num)
  151. # endif
  152. # ifndef conv32be
  153. # define conv32be(num) bswap32(num)
  154. # endif
  155. # ifndef conv64be
  156. # define conv64be(num) bswap64(num)
  157. # endif
  158. # ifndef conv16le
  159. # define conv16le(num) ((uint16_t)(num))
  160. # endif
  161. # ifndef conv32le
  162. # define conv32le(num) ((uint32_t)(num))
  163. # endif
  164. # ifndef conv64le
  165. # define conv64le(num) ((uint64_t)(num))
  166. # endif
  167. #endif
  168. //////////////////////////////
  169. // Aligned reads and writes //
  170. //////////////////////////////
  171. static inline uint16_t
  172. read16be(const uint8_t *buf)
  173. {
  174. uint16_t num = *(const uint16_t *)buf;
  175. return conv16be(num);
  176. }
  177. static inline uint16_t
  178. read16le(const uint8_t *buf)
  179. {
  180. uint16_t num = *(const uint16_t *)buf;
  181. return conv16le(num);
  182. }
  183. static inline uint32_t
  184. read32be(const uint8_t *buf)
  185. {
  186. uint32_t num = *(const uint32_t *)buf;
  187. return conv32be(num);
  188. }
  189. static inline uint32_t
  190. read32le(const uint8_t *buf)
  191. {
  192. uint32_t num = *(const uint32_t *)buf;
  193. return conv32le(num);
  194. }
  195. static inline uint64_t
  196. read64be(const uint8_t *buf)
  197. {
  198. uint64_t num = *(const uint64_t *)buf;
  199. return conv64be(num);
  200. }
  201. static inline uint64_t
  202. read64le(const uint8_t *buf)
  203. {
  204. uint64_t num = *(const uint64_t *)buf;
  205. return conv64le(num);
  206. }
  207. // NOTE: Possible byte swapping must be done in a macro to allow GCC
  208. // to optimize byte swapping of constants when using glibc's or *BSD's
  209. // byte swapping macros. The actual write is done in an inline function
  210. // to make type checking of the buf pointer possible similarly to readXXYe()
  211. // functions.
  212. #define write16be(buf, num) write16ne((buf), conv16be(num))
  213. #define write16le(buf, num) write16ne((buf), conv16le(num))
  214. #define write32be(buf, num) write32ne((buf), conv32be(num))
  215. #define write32le(buf, num) write32ne((buf), conv32le(num))
  216. #define write64be(buf, num) write64ne((buf), conv64be(num))
  217. #define write64le(buf, num) write64ne((buf), conv64le(num))
  218. static inline void
  219. write16ne(uint8_t *buf, uint16_t num)
  220. {
  221. *(uint16_t *)buf = num;
  222. return;
  223. }
  224. static inline void
  225. write32ne(uint8_t *buf, uint32_t num)
  226. {
  227. *(uint32_t *)buf = num;
  228. return;
  229. }
  230. static inline void
  231. write64ne(uint8_t *buf, uint64_t num)
  232. {
  233. *(uint64_t *)buf = num;
  234. return;
  235. }
  236. ////////////////////////////////
  237. // Unaligned reads and writes //
  238. ////////////////////////////////
  239. // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
  240. // 32-bit unaligned integer loads and stores. It's possible that 64-bit
  241. // unaligned access doesn't work or is slower than byte-by-byte access.
  242. // Since unaligned 64-bit is probably not needed as often as 16-bit or
  243. // 32-bit, we simply don't support 64-bit unaligned access for now.
  244. #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
  245. # define unaligned_read16be read16be
  246. # define unaligned_read16le read16le
  247. # define unaligned_read32be read32be
  248. # define unaligned_read32le read32le
  249. # define unaligned_write16be write16be
  250. # define unaligned_write16le write16le
  251. # define unaligned_write32be write32be
  252. # define unaligned_write32le write32le
  253. #else
  254. static inline uint16_t
  255. unaligned_read16be(const uint8_t *buf)
  256. {
  257. uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
  258. return num;
  259. }
  260. static inline uint16_t
  261. unaligned_read16le(const uint8_t *buf)
  262. {
  263. uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
  264. return num;
  265. }
  266. static inline uint32_t
  267. unaligned_read32be(const uint8_t *buf)
  268. {
  269. uint32_t num = (uint32_t)buf[0] << 24;
  270. num |= (uint32_t)buf[1] << 16;
  271. num |= (uint32_t)buf[2] << 8;
  272. num |= (uint32_t)buf[3];
  273. return num;
  274. }
  275. static inline uint32_t
  276. unaligned_read32le(const uint8_t *buf)
  277. {
  278. uint32_t num = (uint32_t)buf[0];
  279. num |= (uint32_t)buf[1] << 8;
  280. num |= (uint32_t)buf[2] << 16;
  281. num |= (uint32_t)buf[3] << 24;
  282. return num;
  283. }
  284. static inline void
  285. unaligned_write16be(uint8_t *buf, uint16_t num)
  286. {
  287. buf[0] = (uint8_t)(num >> 8);
  288. buf[1] = (uint8_t)num;
  289. return;
  290. }
  291. static inline void
  292. unaligned_write16le(uint8_t *buf, uint16_t num)
  293. {
  294. buf[0] = (uint8_t)num;
  295. buf[1] = (uint8_t)(num >> 8);
  296. return;
  297. }
  298. static inline void
  299. unaligned_write32be(uint8_t *buf, uint32_t num)
  300. {
  301. buf[0] = (uint8_t)(num >> 24);
  302. buf[1] = (uint8_t)(num >> 16);
  303. buf[2] = (uint8_t)(num >> 8);
  304. buf[3] = (uint8_t)num;
  305. return;
  306. }
  307. static inline void
  308. unaligned_write32le(uint8_t *buf, uint32_t num)
  309. {
  310. buf[0] = (uint8_t)num;
  311. buf[1] = (uint8_t)(num >> 8);
  312. buf[2] = (uint8_t)(num >> 16);
  313. buf[3] = (uint8_t)(num >> 24);
  314. return;
  315. }
  316. #endif
  317. static inline uint32_t
  318. bsr32(uint32_t n)
  319. {
  320. // Check for ICC first, since it tends to define __GNUC__ too.
  321. #if defined(__INTEL_COMPILER)
  322. return _bit_scan_reverse(n);
  323. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  324. // GCC >= 3.4 has __builtin_clz(), which gives good results on
  325. // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
  326. // either plain BSR (so the XOR gets optimized away) or LZCNT and
  327. // XOR (if -march indicates that SSE4a instructions are supported).
  328. return __builtin_clz(n) ^ 31U;
  329. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  330. uint32_t i;
  331. __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
  332. return i;
  333. #elif defined(_MSC_VER) && _MSC_VER >= 1400
  334. // MSVC isn't supported by tuklib, but since this code exists,
  335. // it doesn't hurt to have it here anyway.
  336. uint32_t i;
  337. _BitScanReverse((DWORD *)&i, n);
  338. return i;
  339. #else
  340. uint32_t i = 31;
  341. if ((n & UINT32_C(0xFFFF0000)) == 0) {
  342. n <<= 16;
  343. i = 15;
  344. }
  345. if ((n & UINT32_C(0xFF000000)) == 0) {
  346. n <<= 8;
  347. i -= 8;
  348. }
  349. if ((n & UINT32_C(0xF0000000)) == 0) {
  350. n <<= 4;
  351. i -= 4;
  352. }
  353. if ((n & UINT32_C(0xC0000000)) == 0) {
  354. n <<= 2;
  355. i -= 2;
  356. }
  357. if ((n & UINT32_C(0x80000000)) == 0)
  358. --i;
  359. return i;
  360. #endif
  361. }
  362. static inline uint32_t
  363. clz32(uint32_t n)
  364. {
  365. #if defined(__INTEL_COMPILER)
  366. return _bit_scan_reverse(n) ^ 31U;
  367. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  368. return __builtin_clz(n);
  369. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  370. uint32_t i;
  371. __asm__("bsrl %1, %0\n\t"
  372. "xorl $31, %0"
  373. : "=r" (i) : "rm" (n));
  374. return i;
  375. #elif defined(_MSC_VER) && _MSC_VER >= 1400
  376. uint32_t i;
  377. _BitScanReverse((DWORD *)&i, n);
  378. return i ^ 31U;
  379. #else
  380. uint32_t i = 0;
  381. if ((n & UINT32_C(0xFFFF0000)) == 0) {
  382. n <<= 16;
  383. i = 16;
  384. }
  385. if ((n & UINT32_C(0xFF000000)) == 0) {
  386. n <<= 8;
  387. i += 8;
  388. }
  389. if ((n & UINT32_C(0xF0000000)) == 0) {
  390. n <<= 4;
  391. i += 4;
  392. }
  393. if ((n & UINT32_C(0xC0000000)) == 0) {
  394. n <<= 2;
  395. i += 2;
  396. }
  397. if ((n & UINT32_C(0x80000000)) == 0)
  398. ++i;
  399. return i;
  400. #endif
  401. }
  402. static inline uint32_t
  403. ctz32(uint32_t n)
  404. {
  405. #if defined(__INTEL_COMPILER)
  406. return _bit_scan_forward(n);
  407. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
  408. return __builtin_ctz(n);
  409. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  410. uint32_t i;
  411. __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
  412. return i;
  413. #elif defined(_MSC_VER) && _MSC_VER >= 1400
  414. uint32_t i;
  415. _BitScanForward((DWORD *)&i, n);
  416. return i;
  417. #else
  418. uint32_t i = 0;
  419. if ((n & UINT32_C(0x0000FFFF)) == 0) {
  420. n >>= 16;
  421. i = 16;
  422. }
  423. if ((n & UINT32_C(0x000000FF)) == 0) {
  424. n >>= 8;
  425. i += 8;
  426. }
  427. if ((n & UINT32_C(0x0000000F)) == 0) {
  428. n >>= 4;
  429. i += 4;
  430. }
  431. if ((n & UINT32_C(0x00000003)) == 0) {
  432. n >>= 2;
  433. i += 2;
  434. }
  435. if ((n & UINT32_C(0x00000001)) == 0)
  436. ++i;
  437. return i;
  438. #endif
  439. }
  440. #define bsf32 ctz32
  441. #endif