lcg.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. // Copyright (c) The Minecraft Seed Finding Team
  2. //
  3. // MIT License
  4. #ifndef LIBSEEDFINDING_LCG_H
  5. #define LIBSEEDFINDING_LCG_H
  6. #include <cinttypes>
  7. #include <limits>
  8. #include <type_traits>
  9. #include "util.h"
  10. #if CPP_VER < 201402L
  11. #error lcg.h requires C++ 14
  12. #endif
  13. /**
  14. * Contains an implementation of the Java LCG and utility functions surrounding it.
  15. * Many functions here are constexpr, meaning that they can be evaluated at compile-time if their inputs are
  16. * also statically known. Otherwise, they are usually inlined by the compiler instead.
  17. */
  18. namespace lcg {
  19. typedef uint64_t Random;
  20. const uint64_t MULTIPLIER = 0x5deece66dLL;
  21. const uint64_t ADDEND = 0xbLL;
  22. const uint64_t MASK = 0xffffffffffffL;
  23. /// The minimum distance that two nextFloat values can be from each other. May be used to iterate over all valid floats.
  24. static DEVICEABLE_CONST float FLOAT_UNIT = 1.0f / static_cast<float>(1L << 24);
  25. /// The minimum distance that two nextDouble values can be from each other. May be used to iterate over all valid doubles.
  26. static DEVICEABLE_CONST double DOUBLE_UNIT = 1.0 / static_cast<double>(1LL << 53);
  27. // Declared here for forward reference
  28. template<int B>
  29. DEVICEABLE constexpr typename std::enable_if_t<(0 <= B && B <= 32), int32_t> next(Random &rand);
  30. /**
  31. * Contains internal functions. These are unstable, do not use them for any reason!
  32. * If you think you need something from in here, first look for alternatives, else consider adding something to the public API for it.
  33. * The internal functions are included in the header file so that the compiler can optimize using their implementations.
  34. * Many of these functions are force-inlined. This is to ensure the public force-inlined functions are fully inlined properly.
  35. */
  36. namespace internal {
  37. // for returning multiple values
  38. struct LCG {
  39. uint64_t multiplier;
  40. uint64_t addend;
  41. };
  42. DEVICEABLE FORCEINLINE constexpr LCG combine(uint64_t calls) {
  43. uint64_t multiplier = 1;
  44. uint64_t addend = 0;
  45. uint64_t intermediate_multiplier = MULTIPLIER;
  46. uint64_t intermediate_addend = ADDEND;
  47. for (uint64_t k = calls; k != 0; k >>= 1) {
  48. if ((k & 1) != 0) {
  49. multiplier *= intermediate_multiplier;
  50. addend = intermediate_multiplier * addend + intermediate_addend;
  51. }
  52. intermediate_addend = (intermediate_multiplier + 1) * intermediate_addend;
  53. intermediate_multiplier *= intermediate_multiplier;
  54. }
  55. multiplier &= MASK;
  56. addend &= MASK;
  57. return {multiplier, addend};
  58. }
  59. DEVICEABLE FORCEINLINE constexpr LCG combine(int64_t calls) {
  60. return combine(static_cast<uint64_t>(calls));
  61. }
  62. DEVICEABLE FORCEINLINE constexpr uint64_t gcd(uint64_t a, uint64_t b) {
  63. if (b == 0) {
  64. return a;
  65. }
  66. while (true) {
  67. a %= b;
  68. if (a == 0) {
  69. return b;
  70. }
  71. b %= a;
  72. if (b == 0) {
  73. return a;
  74. }
  75. }
  76. }
  77. // Returns modulo inverse of a with
  78. // respect to m using extended Euclid
  79. // Algorithm Assumption: a and m are
  80. // coprimes, i.e., gcd(a, m) = 1
  81. // stolen code, now handles the case where gcd(a,m) != 1
  82. DEVICEABLE FORCEINLINE constexpr uint64_t euclidean_helper(uint64_t a, uint64_t m) {
  83. uint64_t y = 0, x = 1;
  84. if (m == 1) {
  85. return 0;
  86. }
  87. uint64_t gcd_ = gcd(a, m);
  88. while (a > gcd_) {
  89. uint64_t q = a / m;
  90. uint64_t t = m;
  91. m = a % m;
  92. a = t;
  93. t = y;
  94. y = x - q * y;
  95. x = t;
  96. }
  97. return x;
  98. }
  99. DEVICEABLE FORCEINLINE constexpr uint64_t theta(uint64_t num) {
  100. if (num % 4 == 3) {
  101. num = (1LL << 50) - num;
  102. }
  103. // xhat = num
  104. uint64_t xhat_lo = num;
  105. uint64_t xhat_hi = 0;
  106. // raise xhat to the power of 2^49 by squaring it 49 times
  107. for (int i = 0; i < 49; i++) {
  108. // https://www.codeproject.com/Tips/618570/UInt-Multiplication-Squaring
  109. uint64_t r1 = xhat_lo & 0xffffffffLL;
  110. uint64_t t = r1 * r1;
  111. uint64_t w3 = t & 0xffffffffLL;
  112. uint64_t k = t >> 32;
  113. uint64_t r = xhat_lo >> 32;
  114. uint64_t m = r * r1;
  115. t = m + k;
  116. uint64_t w2 = t & 0xffffffffLL;
  117. uint64_t w1 = t >> 32;
  118. t = m + w2;
  119. k = t >> 32;
  120. uint64_t new_hi = r * r + w1 + k;
  121. uint64_t new_lo = (t << 32) + w3;
  122. new_hi += (xhat_hi * xhat_lo) << 1;
  123. xhat_lo = new_lo;
  124. xhat_hi = new_hi;
  125. }
  126. xhat_hi &= (1LL << (99 - 64)) - 1;
  127. // xhat--
  128. if (xhat_lo == 0) xhat_hi--;
  129. xhat_lo--;
  130. // xhat >>= 51
  131. xhat_lo = (xhat_lo >> 51) | (xhat_hi << (64 - 51));
  132. // xhat &= MASK
  133. xhat_lo &= MASK;
  134. return xhat_lo;
  135. }
  136. DEVICEABLE constexpr int32_t dynamic_next_int_power_of_2(Random &rand, int32_t n) {
  137. return static_cast<int32_t>((static_cast<uint64_t>(next<31>(rand)) * static_cast<uint64_t>(n)) >> 31);
  138. }
  139. }
  140. /// Unsigned equivalent of combined_lcg<N>.
  141. template<uint64_t N>
  142. struct ucombined_lcg {
  143. /// The multiplier of the LCG that advances by N.
  144. static const uint64_t multiplier = internal::combine(N).multiplier;
  145. /// The addend of the LCG that advances by N.
  146. static const uint64_t addend = internal::combine(N).addend;
  147. };
  148. /**
  149. * Contains the multiplier and addend of the LCG equivalent to advancing the Java LCG by N.
  150. * N may be negative to signal a backwards advance.
  151. */
  152. template<int64_t N>
  153. struct combined_lcg {
  154. /// The multiplier of the LCG that advances by N.
  155. static const uint64_t multiplier = ucombined_lcg<static_cast<uint64_t>(N)>::multiplier;
  156. /// The addend of the LCG that advances by N.
  157. static const uint64_t addend = ucombined_lcg<static_cast<uint64_t>(N)>::addend;
  158. };
  159. /// Advances the Random by an unsigned N steps, which defaults to 1. Runs in O(1) time because of compile-time optimizations.
  160. template<uint64_t N = 1>
  161. DEVICEABLE constexpr void uadvance(Random &rand) {
  162. rand = (rand * ucombined_lcg<N>::multiplier + ucombined_lcg<N>::addend) & MASK;
  163. }
  164. /**
  165. * Advances the Random by N steps, which defaults to 1. Runs in O(1) time because of compile-time optimizations.
  166. * N may be negative to signal a backwards advance.
  167. */
  168. template<int64_t N = 1>
  169. DEVICEABLE constexpr void advance(Random &rand) {
  170. uadvance<static_cast<uint64_t>(N)>(rand);
  171. }
  172. /// Force-inlined version of dynamic_advance. Do not use unless profiling tells you that the compiler is not inlining anyway!
  173. DEVICEABLE FORCEINLINE constexpr void dynamic_advance_inline(Random &rand, uint64_t n) {
  174. #define ADVANCE_BIT(N) if (n < (1LL << N)) return;\
  175. if (n & (1LL << N)) uadvance<1LL << N>(rand);
  176. ADVANCE_BIT(0)
  177. ADVANCE_BIT(1)
  178. ADVANCE_BIT(2)
  179. ADVANCE_BIT(3)
  180. ADVANCE_BIT(4)
  181. ADVANCE_BIT(5)
  182. ADVANCE_BIT(6)
  183. ADVANCE_BIT(7)
  184. ADVANCE_BIT(8)
  185. ADVANCE_BIT(9)
  186. ADVANCE_BIT(10)
  187. ADVANCE_BIT(11)
  188. ADVANCE_BIT(12)
  189. ADVANCE_BIT(13)
  190. ADVANCE_BIT(14)
  191. ADVANCE_BIT(15)
  192. ADVANCE_BIT(16)
  193. ADVANCE_BIT(17)
  194. ADVANCE_BIT(18)
  195. ADVANCE_BIT(19)
  196. ADVANCE_BIT(20)
  197. ADVANCE_BIT(21)
  198. ADVANCE_BIT(22)
  199. ADVANCE_BIT(23)
  200. ADVANCE_BIT(24)
  201. ADVANCE_BIT(25)
  202. ADVANCE_BIT(26)
  203. ADVANCE_BIT(27)
  204. ADVANCE_BIT(28)
  205. ADVANCE_BIT(29)
  206. ADVANCE_BIT(30)
  207. ADVANCE_BIT(31)
  208. ADVANCE_BIT(32)
  209. ADVANCE_BIT(33)
  210. ADVANCE_BIT(34)
  211. ADVANCE_BIT(35)
  212. ADVANCE_BIT(36)
  213. ADVANCE_BIT(37)
  214. ADVANCE_BIT(38)
  215. ADVANCE_BIT(39)
  216. ADVANCE_BIT(40)
  217. ADVANCE_BIT(41)
  218. ADVANCE_BIT(42)
  219. ADVANCE_BIT(43)
  220. ADVANCE_BIT(44)
  221. ADVANCE_BIT(45)
  222. ADVANCE_BIT(46)
  223. ADVANCE_BIT(47)
  224. #undef ADVANCE_BIT
  225. }
  226. /// Advances the Random by an unsigned n steps. Used when n is not known at compile-time. Runs in O(log(n)) time.
  227. DEVICEABLE constexpr void dynamic_advance(Random &rand, uint64_t n) {
  228. dynamic_advance_inline(rand, n);
  229. }
  230. /// Force-inlined version of dynamic_advance. Do not use unless profiling tells you that the compiler is not inlining anyway!
  231. DEVICEABLE FORCEINLINE constexpr void dynamic_advance_inline(Random &rand, int64_t n) {
  232. dynamic_advance_inline(rand, static_cast<uint64_t>(n));
  233. }
  234. /**
  235. * Advances the Random by n steps. Used when n is not known at compile-time. Runs in O(log(n)) time.
  236. * n may be negative to signal a backwards advance.
  237. */
  238. DEVICEABLE constexpr void dynamic_advance(Random &rand, int64_t n) {
  239. dynamic_advance_inline(rand, n);
  240. }
  241. /// Force-inlined version of dfz2seed. Do not use unless profiling tells you that the compiler is not inlining anyway!
  242. DEVICEABLE FORCEINLINE constexpr Random dfz2seed_inline(uint64_t dfz) {
  243. Random seed = 0;
  244. dynamic_advance_inline(seed, dfz);
  245. return seed;
  246. }
  247. /**
  248. * Converts a Distance From Zero (DFZ) value to a Random seed.
  249. * DFZ is a representation of a seed which is the number of LCG calls required to get from seed 0 to that seed.
  250. * To get Random outputs from a DFZ value it must first be converted to a seed, which is done in O(log(dfz)).
  251. * In various situations, especially far GPU parallelization, it may be useful to represent seeds this way.
  252. */
  253. DEVICEABLE constexpr Random dfz2seed(uint64_t dfz) {
  254. return dfz2seed_inline(dfz);
  255. }
  256. /// Force-inlined version of seed2dfz. Do not use unless profiling tells you that the compiler is not inlining anyway!
  257. DEVICEABLE FORCEINLINE constexpr uint64_t seed2dfz_inline(Random seed) {
  258. uint64_t a = 25214903917LL;
  259. uint64_t b = (((seed * (MULTIPLIER - 1)) * 179120439724963LL) + 1) & ((1LL << 50) - 1);
  260. uint64_t abar = internal::theta(a);
  261. uint64_t bbar = internal::theta(b);
  262. uint64_t gcd_ = internal::gcd(abar, (1LL << 48));
  263. return (bbar * internal::euclidean_helper(abar, (1LL << 48)) & 0x3FFFFFFFFFFFLL) / gcd_; //+ i*(1L << 48)/gcd;
  264. }
  265. /**
  266. * Converts a Random seed to DFZ form. See dfz2seed for a description of DFZ form.
  267. * This function should be called reservedly, as although it is O(1), it is relatively slow.
  268. */
  269. DEVICEABLE constexpr uint64_t seed2dfz(Random seed) {
  270. return seed2dfz_inline(seed);
  271. }
  272. /// Advances the LCG and gets the upper B bits from it.
  273. template<int B>
  274. DEVICEABLE constexpr typename std::enable_if_t<(0 <= B && B <= 32), int32_t> next(Random &rand) {
  275. advance(rand);
  276. return static_cast<int32_t>(rand >> (48 - B));
  277. }
  278. /// Does an unbounded nextInt call and returns the result.
  279. DEVICEABLE constexpr int32_t next_int_unbounded(Random &rand) {
  280. return next<32>(rand);
  281. }
  282. /// Does a bounded nextInt call with bound N.
  283. template<int32_t N>
  284. DEVICEABLE constexpr typename std::enable_if_t<(N > 0) && ((N & -N) == N), int32_t> next_int(Random &rand) {
  285. return static_cast<int32_t>((static_cast<uint64_t>(next<31>(rand)) * static_cast<uint64_t>(N)) >> 31);
  286. }
  287. template<int32_t N>
  288. DEVICEABLE constexpr typename std::enable_if_t<(N > 0) && ((N & -N) != N), int32_t> next_int(Random &rand) {
  289. int32_t bits = next<31>(rand);
  290. int32_t val = bits % N;
  291. while (bits - val + (N - 1) < 0) {
  292. bits = next<31>(rand);
  293. val = bits % N;
  294. }
  295. return val;
  296. }
  297. /**
  298. * Does a bounded nextInt call with bound N. If N is not a power of 2, then it makes the assumption that the loop
  299. * does not iterate more than once. The probability of this being correct depends on N, but for small N this
  300. * function is extremely likely to have the same effect as next_int.
  301. */
  302. template<int32_t N>
  303. DEVICEABLE constexpr typename std::enable_if_t<(N > 0), int32_t> next_int_fast(Random &rand) {
  304. if ((N & -N) == N) {
  305. return next_int<N>(rand);
  306. } else {
  307. return next<31>(rand) % N;
  308. }
  309. }
  310. /// Does a bounded nextInt call with bound n, used when n is not known in advance.
  311. DEVICEABLE constexpr int32_t dynamic_next_int(Random &rand, int32_t n) {
  312. if ((n & -n) == n) {
  313. return internal::dynamic_next_int_power_of_2(rand, n);
  314. } else {
  315. int32_t bits = next<31>(rand);
  316. int32_t val = bits % n;
  317. while (bits - val + (n - 1) < 0) {
  318. bits = next<31>(rand);
  319. val = bits % n;
  320. }
  321. return val;
  322. }
  323. }
  324. /**
  325. * Does a bounded nextInt call with bound n, using the "fast" approach, used when n is not known in advance.
  326. * See next_int_fast for a description of the fast approach.
  327. */
  328. DEVICEABLE constexpr int32_t dynamic_next_int_fast(Random &rand, int32_t n) {
  329. if ((n & -n) == n) {
  330. return internal::dynamic_next_int_power_of_2(rand, n);
  331. } else {
  332. return next<31>(rand) % n;
  333. }
  334. }
  335. /// Does a nextLong call.
  336. DEVICEABLE constexpr int64_t next_long(Random &rand) {
  337. // separate out calls due to unspecified evaluation order in C++
  338. int32_t hi = next<32>(rand);
  339. int32_t lo = next<32>(rand);
  340. return (static_cast<int64_t>(hi) << 32) + static_cast<int64_t>(lo);
  341. }
  342. /// Does an unsigned nextLong call.
  343. DEVICEABLE constexpr uint64_t next_ulong(Random &rand) {
  344. return static_cast<uint64_t>(next_long(rand));
  345. }
  346. /// Does a nextBoolean call.
  347. DEVICEABLE constexpr bool next_bool(Random &rand) {
  348. return next<1>(rand) != 0;
  349. }
  350. /// Does a nextFloat call.
  351. DEVICEABLE std::enable_if_t<std::numeric_limits<float>::is_iec559, float> next_float(Random &rand) {
  352. return static_cast<float>(next<24>(rand)) * FLOAT_UNIT;
  353. }
  354. /// Does a nextDouble call.
  355. DEVICEABLE std::enable_if_t<std::numeric_limits<double>::is_iec559, double> next_double(Random &rand) {
  356. // separate out calls due to unspecified evaluation order in C++
  357. int32_t hi = next<26>(rand);
  358. int32_t lo = next<27>(rand);
  359. return static_cast<double>((static_cast<int64_t>(hi) << 27) + static_cast<int64_t>(lo)) * DOUBLE_UNIT;
  360. }
  361. }
  362. #endif //LIBSEEDFINDING_LCG_H