fastpos.h 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file fastpos.h
  4. /// \brief Kind of two-bit version of bit scan reverse
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #ifndef LZMA_FASTPOS_H
  14. #define LZMA_FASTPOS_H
  15. // LZMA encodes match distances by storing the highest two bits using
  16. // a six-bit value [0, 63], and then the missing lower bits.
  17. // Dictionary size is also stored using this encoding in the .xz
  18. // file format header.
  19. //
  20. // fastpos.h provides a way to quickly find out the correct six-bit
  21. // values. The following table gives some examples of this encoding:
  22. //
  23. // dist return
  24. // 0 0
  25. // 1 1
  26. // 2 2
  27. // 3 3
  28. // 4 4
  29. // 5 4
  30. // 6 5
  31. // 7 5
  32. // 8 6
  33. // 11 6
  34. // 12 7
  35. // ... ...
  36. // 15 7
  37. // 16 8
  38. // 17 8
  39. // ... ...
  40. // 23 8
  41. // 24 9
  42. // 25 9
  43. // ... ...
  44. //
  45. //
  46. // Provided functions or macros
  47. // ----------------------------
  48. //
  49. // get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
  50. // assumes that dist >= FULL_DISTANCES, thus the result is at least
  51. // FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
  52. // get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
  53. // should be tiny bit faster due to the assumption being made.
  54. //
  55. //
  56. // Size vs. speed
  57. // --------------
  58. //
  59. // With some CPUs that have fast BSR (bit scan reverse) instruction, the
  60. // size optimized version is slightly faster than the bigger table based
  61. // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
  62. // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
  63. // would still have speed roughly comparable to the table version. Older
  64. // x86 CPUs like the original Pentium have very slow BSR; on those systems
  65. // the table version is a lot faster.
  66. //
  67. // On some CPUs, the table version is a lot faster when using position
  68. // dependent code, but with position independent code the size optimized
  69. // version is slightly faster. This occurs at least on 32-bit SPARC (no
  70. // ASM optimizations).
  71. //
  72. // I'm making the table version the default, because that has good speed
  73. // on all systems I have tried. The size optimized version is sometimes
  74. // slightly faster, but sometimes it is a lot slower.
  75. #ifdef HAVE_SMALL
  76. # define get_dist_slot(dist) \
  77. ((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
  78. static inline uint32_t
  79. get_dist_slot_2(uint32_t dist)
  80. {
  81. const uint32_t i = bsr32(dist);
  82. return (i + i) + ((dist >> (i - 1)) & 1);
  83. }
  84. #else
  85. #define FASTPOS_BITS 13
  86. extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
  87. #define fastpos_shift(extra, n) \
  88. ((extra) + (n) * (FASTPOS_BITS - 1))
  89. #define fastpos_limit(extra, n) \
  90. (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
  91. #define fastpos_result(dist, extra, n) \
  92. lzma_fastpos[(dist) >> fastpos_shift(extra, n)] \
  93. + 2 * fastpos_shift(extra, n)
  94. static inline uint32_t
  95. get_dist_slot(uint32_t dist)
  96. {
  97. // If it is small enough, we can pick the result directly from
  98. // the precalculated table.
  99. if (dist < fastpos_limit(0, 0))
  100. return lzma_fastpos[dist];
  101. if (dist < fastpos_limit(0, 1))
  102. return fastpos_result(dist, 0, 1);
  103. return fastpos_result(dist, 0, 2);
  104. }
  105. #ifdef FULL_DISTANCES_BITS
  106. static inline uint32_t
  107. get_dist_slot_2(uint32_t dist)
  108. {
  109. assert(dist >= FULL_DISTANCES);
  110. if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
  111. return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
  112. if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
  113. return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
  114. return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
  115. }
  116. #endif
  117. #endif
  118. #endif