grass_crack.cu 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. //"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\bin\Hostx86\x64" -o crack crack.cu -O3 -m=64 -arch=compute_61 -code=sm_61 -Xptxas -allow-expensive-optimizations=true -Xptxas -v
  2. #include <iostream>
  3. #include <chrono>
  4. #include <fstream>
  5. #include <algorithm>
  6. #include <inttypes.h>
  7. #include <cuda.h>
  8. #ifdef BOINC
  9. #include "boinc_api.h"
  10. #if defined _WIN32 || defined _WIN64
  11. #include "boinc_win.h"
  12. #endif
  13. #endif
  14. // ===== LCG IMPLEMENTATION ===== //
  15. namespace java_lcg { //region Java LCG
  16. #define Random uint64_t
  17. #define RANDOM_MULTIPLIER 0x5DEECE66DULL
  18. #define RANDOM_ADDEND 0xBULL
  19. #define RANDOM_MASK ((1ULL << 48u) - 1)
  20. #define get_random(seed) ((Random)((seed ^ RANDOM_MULTIPLIER) & RANDOM_MASK))
  21. __host__ __device__ __forceinline__ static int32_t random_next(Random *random, int bits) {
  22. *random = (*random * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  23. return (int32_t) (*random >> (48u - bits));
  24. }
  25. __device__ __forceinline__ static int32_t random_next_int(Random *random, const uint16_t bound) {
  26. int32_t r = random_next(random, 31);
  27. const uint16_t m = bound - 1u;
  28. if ((bound & m) == 0) {
  29. r = (int32_t) ((bound * (uint64_t) r) >> 31u);
  30. } else {
  31. for (int32_t u = r;
  32. u - (r = u % bound) + m < 0;
  33. u = random_next(random, 31));
  34. }
  35. return r;
  36. }
  37. /*
  38. __device__ __forceinline__ static int32_t random_next_int(Random *random, const uint16_t bound) {
  39. int32_t r = random_next(random, 31);
  40. if (__popc(bound) == 1) {
  41. return (int32_t) ((bound * (uint64_t) r) >> 31u);
  42. } else {
  43. const uint16_t m = bound - 1u;
  44. for (int32_t u = r;
  45. u - (r = u % bound) + m < 0;
  46. u = random_next(random, 31));
  47. }
  48. return r;
  49. }*/
  50. __host__ __device__ __forceinline__ static double next_double(Random *random) {
  51. return (double) ((((uint64_t) ((uint32_t) random_next(random, 26)) << 27u)) + random_next(random, 27)) / (double)(1ULL << 53);
  52. }
  53. __host__ __device__ __forceinline__ static uint64_t random_next_long (Random *random) {
  54. return (((uint64_t)random_next(random, 32)) << 32u) + (int32_t)random_next(random, 32);
  55. }
  56. __host__ __device__ __forceinline__ static void advance2(Random *random) {
  57. *random = (*random * 0xBB20B4600A69LLU + 0x40942DE6BALLU) & RANDOM_MASK;
  58. }
  59. }
  60. using namespace java_lcg;
  61. namespace device_intrinsics { //region DEVICE INTRINSICS
  62. #define DEVICE_STATIC_INTRINSIC_QUALIFIERS static __device__ __forceinline__
  63. #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__)
  64. #define PXL_GLOBAL_PTR "l"
  65. #else
  66. #define PXL_GLOBAL_PTR "r"
  67. #endif
  68. DEVICE_STATIC_INTRINSIC_QUALIFIERS void __prefetch_local_l1(const void* const ptr)
  69. {
  70. asm("prefetch.local.L1 [%0];" : : PXL_GLOBAL_PTR(ptr));
  71. }
  72. DEVICE_STATIC_INTRINSIC_QUALIFIERS void __prefetch_global_uniform(const void* const ptr)
  73. {
  74. asm("prefetchu.L1 [%0];" : : PXL_GLOBAL_PTR(ptr));
  75. }
  76. DEVICE_STATIC_INTRINSIC_QUALIFIERS void __prefetch_local_l2(const void* const ptr)
  77. {
  78. asm("prefetch.local.L2 [%0];" : : PXL_GLOBAL_PTR(ptr));
  79. }
  80. #if __CUDA__ < 10
  81. #define __ldg(ptr) (*(ptr))
  82. #endif
  83. }
  84. using namespace device_intrinsics;
  85. #define BLOCK_SIZE (128)
  86. //#define BLOCK_SIZE (64)
  87. #define WORK_SIZE_BITS 15
  88. #define SEEDS_PER_CALL ((1ULL << (WORK_SIZE_BITS)) * BLOCK_SIZE)
  89. //The generation of the simplex layers and noise
  90. namespace simplex { //region Simplex layer gen
  91. #define F2 0.3660254037844386
  92. #define G2 0.21132486540518713
  93. __constant__ __device__ int8_t const grad2[12][2] = {{1, 1,},
  94. {-1, 1,},
  95. {1, -1,},
  96. {-1, -1,},
  97. {1, 0,},
  98. {-1, 0,},
  99. {1, 0,},
  100. {-1, 0,},
  101. {0, 1,},
  102. {0, -1,},
  103. {0, 1,},
  104. {0, -1,}};
  105. /* End of constant for simplex noise*/
  106. struct SimplexOctave {
  107. double xo;
  108. double yo;
  109. uint8_t permutations[256];
  110. };
  111. __shared__ uint8_t permutations[256][BLOCK_SIZE];
  112. #define getValue(array, index) array[index][threadIdx.x]
  113. #define setValue(array, index, value) array[index][threadIdx.x] = value
  114. /* simplex noise result is in buffer */
  115. __device__ static inline double getSimplexNoise(const double chunkX, const double chunkZ, double offsetX, double offsetZ, const double ampFactor, const uint8_t nbOctaves, Random *random, SimplexOctave resultArray[]) {
  116. offsetX /= 1.5;
  117. offsetZ /= 1.5;
  118. double res = 0.0;
  119. double octaveDiminution = 1.0;
  120. double octaveAmplification = 1.0;
  121. for (int j = 0; j < nbOctaves; ++j) {
  122. __prefetch_local_l2(&resultArray[j]);
  123. double xo = next_double(random) * 256.0;
  124. double yo = next_double(random) * 256.0;
  125. advance2(random);
  126. #pragma unroll
  127. for(int w = 0; w<256; w++) {
  128. setValue(permutations, w, w);
  129. }
  130. for(int index = 0; index<256; index++) {
  131. uint32_t randomIndex = random_next_int(random, 256ull - index) + index;
  132. //if (randomIndex != index) {
  133. // swap
  134. uint8_t v1 = getValue(permutations,index);
  135. uint8_t v2 = getValue(permutations,randomIndex);
  136. setValue(permutations,index, v2);
  137. setValue(permutations, randomIndex, v1);
  138. //}
  139. }
  140. double XCoords = (double) chunkX * offsetX * octaveAmplification + xo;
  141. double ZCoords = (double) chunkZ * offsetZ * octaveAmplification + yo;
  142. // Skew the input space to determine which simplex cell we're in
  143. double hairyFactor = (XCoords + ZCoords) * F2;
  144. int32_t tempX = static_cast<int32_t>(XCoords + hairyFactor);
  145. int32_t tempZ = static_cast<int32_t>(ZCoords + hairyFactor);
  146. int32_t xHairy = (XCoords + hairyFactor < tempX) ? (tempX - 1) : (tempX);
  147. int32_t zHairy = (ZCoords + hairyFactor < tempZ) ? (tempZ - 1) : (tempZ);
  148. // Work out the hashed gradient indices of the three simplex corners
  149. uint32_t ii = (uint32_t) xHairy & 0xffu;
  150. uint32_t jj = (uint32_t) zHairy & 0xffu;
  151. //__prefetch_local_l1(&permutations[(uint16_t)(jj + 1)& 0xffu]);
  152. double d11 = (double) (xHairy + zHairy) * G2;
  153. double X0 = (double) xHairy - d11; // Unskew the cell origin back to (x,y) space
  154. double Y0 = (double) zHairy - d11;
  155. double x0 = XCoords - X0; // The x,y distances from the cell origin
  156. double y0 = ZCoords - Y0;
  157. // For the 2D case, the simplex shape is an equilateral triangle.
  158. // Determine which simplex we are in.
  159. int offsetSecondCornerX, offsetSecondCornerZ; // Offsets for second (middle) corner of simplex in (i,j) coords
  160. if (x0 > y0) { // lower triangle, XY order: (0,0)->(1,0)->(1,1)
  161. offsetSecondCornerX = 1;
  162. offsetSecondCornerZ = 0;
  163. } else { // upper triangle, YX order: (0,0)->(0,1)->(1,1)
  164. offsetSecondCornerX = 0;
  165. offsetSecondCornerZ = 1;
  166. }
  167. double x1 = (x0 - (double) offsetSecondCornerX) + G2; // Offsets for middle corner in (x,y) unskewed coords
  168. double y1 = (y0 - (double) offsetSecondCornerZ) + G2;
  169. double x2 = (x0 - 1.0) + 2.0 * G2; // Offsets for last corner in (x,y) unskewed coords
  170. double y2 = (y0 - 1.0) + 2.0 * G2;
  171. uint8_t gi0 = getValue(permutations,(uint32_t) (ii + getValue(permutations,jj)) & 0xffu) % 12u;
  172. uint8_t gi1 = getValue(permutations,(uint32_t)(ii + offsetSecondCornerX + getValue(permutations,(uint32_t) (jj + offsetSecondCornerZ) & 0xffu))& 0xffu) % 12u;
  173. uint8_t gi2 = getValue(permutations,(uint32_t)(ii + 1 + getValue(permutations,(uint32_t)(jj + 1)& 0xffu))& 0xffu) % 12u;
  174. // Calculate the contribution from the three corners
  175. double t0 = 0.5 - x0 * x0 - y0 * y0;
  176. double n0;
  177. if (t0 < 0.0) {
  178. n0 = 0.0;
  179. } else {
  180. t0 *= t0;
  181. n0 = t0 * t0 * ((double) __ldg(&grad2[gi0][0]) * x0 + (double) __ldg(&grad2[gi0][1]) * y0); // (x,y) of grad2 used for 2D gradient
  182. }
  183. double t1 = 0.5 - x1 * x1 - y1 * y1;
  184. double n1;
  185. if (t1 < 0.0) {
  186. n1 = 0.0;
  187. } else {
  188. t1 *= t1;
  189. n1 = t1 * t1 * ((double) __ldg(&grad2[gi1][0]) * x1 + (double) __ldg(&grad2[gi1][1]) * y1);
  190. }
  191. double t2 = 0.5 - x2 * x2 - y2 * y2;
  192. double n2;
  193. if (t2 < 0.0) {
  194. n2 = 0.0;
  195. } else {
  196. t2 *= t2;
  197. n2 = t2 * t2 * ((double) __ldg(&grad2[gi2][0]) * x2 + (double) __ldg(&grad2[gi2][1]) * y2);
  198. }
  199. // Add contributions from each corner to get the final noise value.
  200. // The result is scaled to return values in the interval [-1,1].
  201. res = res + 70.0 * (n0 + n1 + n2) * 0.55000000000000004 / octaveDiminution;
  202. octaveAmplification *= ampFactor;
  203. octaveDiminution *= 0.5;
  204. resultArray[j].xo = xo;
  205. resultArray[j].yo = yo;
  206. #pragma unroll
  207. for(int c = 0; c<256;c++) {
  208. __prefetch_local_l1(&(resultArray[j].permutations[c+1]));
  209. resultArray[j].permutations[c] = getValue(permutations,c);
  210. }
  211. }
  212. return res;
  213. }
  214. __device__ static inline double getSimplexNoiseFromOctave(const double chunkX, const double chunkZ, double offsetX, double offsetZ, const double ampFactor, const uint8_t nbOctaves, const SimplexOctave resultArray[]) {
  215. __prefetch_local_l1(&resultArray[0]);//Double check
  216. offsetX /= 1.5;
  217. offsetZ /= 1.5;
  218. double res = 0.0;
  219. double octaveDiminution = 1.0;
  220. double octaveAmplification = 1.0;
  221. for (uint8_t j = 0; j < nbOctaves; ++j) {
  222. __prefetch_local_l2(&resultArray[j+1]);
  223. double xo = resultArray[j].xo;
  224. double yo = resultArray[j].yo;
  225. double XCoords = (double) chunkX * offsetX * octaveAmplification + xo;
  226. double ZCoords = (double) chunkZ * offsetZ * octaveAmplification + yo;
  227. // Skew the input space to determine which simplex cell we're in
  228. double hairyFactor = (XCoords + ZCoords) * F2;
  229. int32_t tempX = static_cast<int32_t>(XCoords + hairyFactor);
  230. int32_t tempZ = static_cast<int32_t>(ZCoords + hairyFactor);
  231. int32_t xHairy = (XCoords + hairyFactor < tempX) ? (tempX - 1) : (tempX);
  232. int32_t zHairy = (ZCoords + hairyFactor < tempZ) ? (tempZ - 1) : (tempZ);
  233. // Work out the hashed gradient indices of the three simplex corners
  234. uint8_t ii = (uint32_t) xHairy & 0xffu;
  235. uint8_t jj = (uint32_t) zHairy & 0xffu;
  236. //__prefetch_local_l1(&permutations[(uint16_t)(jj + 1)& 0xffu]);
  237. double d11 = (double) (xHairy + zHairy) * G2;
  238. double X0 = (double) xHairy - d11; // Unskew the cell origin back to (x,y) space
  239. double Y0 = (double) zHairy - d11;
  240. double x0 = XCoords - X0; // The x,y distances from the cell origin
  241. double y0 = ZCoords - Y0;
  242. // For the 2D case, the simplex shape is an equilateral triangle.
  243. // Determine which simplex we are in.
  244. int offsetSecondCornerX, offsetSecondCornerZ; // Offsets for second (middle) corner of simplex in (i,j) coords
  245. if (x0 > y0) { // lower triangle, XY order: (0,0)->(1,0)->(1,1)
  246. offsetSecondCornerX = 1;
  247. offsetSecondCornerZ = 0;
  248. } else { // upper triangle, YX order: (0,0)->(0,1)->(1,1)
  249. offsetSecondCornerX = 0;
  250. offsetSecondCornerZ = 1;
  251. }
  252. double x1 = (x0 - (double) offsetSecondCornerX) + G2; // Offsets for middle corner in (x,y) unskewed coords
  253. double y1 = (y0 - (double) offsetSecondCornerZ) + G2;
  254. double x2 = (x0 - 1.0) + 2.0 * G2; // Offsets for last corner in (x,y) unskewed coords
  255. double y2 = (y0 - 1.0) + 2.0 * G2;
  256. uint8_t gi0 = resultArray[j].permutations[(uint16_t) (ii + resultArray[j].permutations[jj]) & 0xffu] % 12u;
  257. uint8_t gi1 = resultArray[j].permutations[(uint16_t)(ii + offsetSecondCornerX + resultArray[j].permutations[(uint16_t) (jj + offsetSecondCornerZ) & 0xffu])& 0xffu] % 12u;
  258. uint8_t gi2 = resultArray[j].permutations[(uint16_t)(ii + 1 + resultArray[j].permutations[(uint16_t)(jj + 1)& 0xffu])& 0xffu] % 12u;
  259. // Calculate the contribution from the three corners
  260. double t0 = 0.5 - x0 * x0 - y0 * y0;
  261. double n0;
  262. if (t0 < 0.0) {
  263. n0 = 0.0;
  264. } else {
  265. t0 *= t0;
  266. n0 = t0 * t0 * ((double) __ldg(&grad2[gi0][0]) * x0 + (double) __ldg(&grad2[gi0][1]) * y0); // (x,y) of grad2 used for 2D gradient
  267. }
  268. double t1 = 0.5 - x1 * x1 - y1 * y1;
  269. double n1;
  270. if (t1 < 0.0) {
  271. n1 = 0.0;
  272. } else {
  273. t1 *= t1;
  274. n1 = t1 * t1 * ((double) __ldg(&grad2[gi1][0]) * x1 + (double) __ldg(&grad2[gi1][1]) * y1);
  275. }
  276. double t2 = 0.5 - x2 * x2 - y2 * y2;
  277. double n2;
  278. if (t2 < 0.0) {
  279. n2 = 0.0;
  280. } else {
  281. t2 *= t2;
  282. n2 = t2 * t2 * ((double) __ldg(&grad2[gi2][0]) * x2 + (double) __ldg(&grad2[gi2][1]) * y2);
  283. }
  284. // Add contributions from each corner to get the final noise value.
  285. // The result is scaled to return values in the interval [-1,1].
  286. res = res + 70.0 * (n0 + n1 + n2) * 0.55000000000000004 / octaveDiminution;
  287. octaveAmplification *= ampFactor;
  288. octaveDiminution *= 0.5;
  289. }
  290. return res;
  291. }
  292. __device__ static inline double getTempFromTempAndPrecip(double temp, double precip) {
  293. precip = precip * 1.1000000000000001 + 0.5;
  294. temp = (temp * 0.14999999999999999 + 0.69999999999999996) * (1.0 - 0.01) + precip * 0.01;
  295. temp = 1.0 - (1.0 - temp) * (1.0 - temp);
  296. if (temp < 0.0) {
  297. temp = 0.0;
  298. }
  299. if (temp > 1.0) {
  300. temp = 1.0;
  301. }
  302. return temp;
  303. }
  304. __device__ static inline double getHumidFromHumidAndPrecip(double humidity, double precip) {
  305. precip = precip * 1.1000000000000001 + 0.5;
  306. humidity = (humidity * 0.14999999999999999 + 0.5) * (1.0 - 0.002) + precip * 0.002;
  307. if (humidity < 0.0) {
  308. humidity = 0.0;
  309. }
  310. if (humidity > 1.0) {
  311. humidity = 1.0;
  312. }
  313. return humidity;
  314. }
  315. #define ConvertToIndex(value) ((int32_t)((value)*63.0))
  316. }
  317. using namespace simplex;
  318. namespace more_simplex {
  319. #define getSimplexInital(x,y,a1,a2,a3,layer_count,seed,out_array) getSimplexNoise(x,y,a1,a2,a3,layer_count,seed,out_array)
  320. #define getSimplex(x,y,a1,a2,a3,layer_count,data_array) getSimplexNoiseFromOctave(x,y,a1,a2,a3,layer_count,data_array)
  321. #define getSimplexHumidtyInital(x,y,seed,out_array) getSimplexInital(x,y,0.05000000074505806, 0.05000000074505806, 0.33333333333333331, 4, seed, out_array)
  322. #define getSimplexHumidty(x,y,data_array) getSimplex(x,y,0.05000000074505806, 0.05000000074505806, 0.33333333333333331, 4, data_array)
  323. __constant__ uint8_t const biomeLookup[] = {11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1};
  324. __device__ static inline uint8_t getBiome(int x, int z, SimplexOctave precipOctaves[], SimplexOctave tempOctaves[], SimplexOctave humidOctaves[]) {
  325. double precipAtPos = getSimplex((double)x, (double)z, 0.25, 0.25, 0.58823529411764708, 2, precipOctaves);
  326. double tempAtPos = getSimplex((double)x, (double)z, 0.02500000037252903, 0.02500000037252903, 0.25, 4, tempOctaves);
  327. double humidityAtPos = getSimplex((double)x, (double)z, 0.05000000074505806, 0.05000000074505806, 0.33333333333333331, 4, humidOctaves);
  328. int32_t index = ConvertToIndex(getTempFromTempAndPrecip(tempAtPos, precipAtPos)) + ConvertToIndex(getHumidFromHumidAndPrecip(humidityAtPos,precipAtPos)) * 64;
  329. return __ldg(&biomeLookup[index]);
  330. }
  331. }
  332. using namespace more_simplex;
  333. #define ABS_PRECIP (3*.55)
  334. #define MIN_PRECIP (-ABS_PRECIP * 1.1 + 0.5)
  335. #define MAX_PRECIP (ABS_PRECIP * 1.1 + 0.5)
  336. #define D1 0.002
  337. #define D2 (1 - D1)
  338. #define decodeMinHumid(minHumid) ((((minHumid) - MAX_PRECIP * D1) / D2 - 0.5) / 0.15)
  339. #define decodeMaxHumid(maxHumid) ((((maxHumid) - MIN_PRECIP * D1) / D2 - 0.5) / 0.15)
  340. #define GRASS1_X 64
  341. #define GRASS1_Z (-53)
  342. #define GRASS1_MIN_HUMID decodeMinHumid(0.2723577235772357)
  343. #define GRASS1_MAX_HUMID decodeMaxHumid(0.325)
  344. #define GRASS2_X 59
  345. #define GRASS2_Z (-19)
  346. #define GRASS2_MIN_HUMID decodeMinHumid(0.44313725490196076)
  347. #define GRASS2_MAX_HUMID decodeMaxHumid(0.5081967213114754)
  348. #define GRASS3_X 83
  349. #define GRASS3_Z (-40)
  350. #define GRASS3_MIN_HUMID decodeMinHumid(0.4117647058823529)
  351. #define GRASS3_MAX_HUMID decodeMaxHumid(0.4833333333333334)
  352. #define PLAINS_BIOME_PLAYER_X 61
  353. #define PLAINS_BIOME_PLAYER_Z -68
  354. #define PLAINS_BIOME_X 48
  355. #define PLAINS_BIOME_Z -72
  356. #define DESERT_BIOME_X 47
  357. #define DESERT_BIOME_Z -72
  358. #define PLAINS_FOREST_BIOME_2_X 33
  359. #define PLAINS_FOREST_BIOME_2_Z -82
  360. #define DESERT_BIOME_2_X 33
  361. #define DESERT_BIOME_2_Z -81
  362. // //RANDOMLY CHOOSEN, GET ACTUALL DESERT COORDS
  363. //Test humidity
  364. __global__ __launch_bounds__(BLOCK_SIZE,4) static void checkSeedBiomesHumidity(uint64_t worldSeedOffset, uint32_t* count, uint64_t* seeds) {
  365. int64_t seed = blockIdx.x * blockDim.x + threadIdx.x + worldSeedOffset;
  366. register Random biomeSeed = get_random(seed * 39811LL);
  367. SimplexOctave humidOct[4];
  368. double humidAtPos = getSimplexHumidtyInital((double)GRASS3_X, (double)GRASS3_Z, &biomeSeed, humidOct);
  369. //Plains biome humidity check
  370. if (!(GRASS3_MIN_HUMID<humidAtPos&&humidAtPos<GRASS3_MAX_HUMID)) {
  371. return;
  372. }
  373. #define testHumidity(x, z, min, max) humidAtPos = getSimplexHumidty((double)x, (double)z, humidOct);\
  374. if (!(min < humidAtPos && humidAtPos < max)) return;
  375. testHumidity(GRASS2_X, GRASS2_Z, GRASS2_MIN_HUMID, GRASS2_MAX_HUMID)
  376. testHumidity(GRASS1_X, GRASS1_Z, GRASS1_MIN_HUMID, GRASS1_MAX_HUMID)
  377. seeds[atomicAdd(count, 1)] = seed;
  378. }
  379. //Test temperature and other points
  380. __global__ __launch_bounds__(BLOCK_SIZE,2) static void part2ElectricBooglo(uint64_t worldSeedOffset, uint32_t count, uint64_t* seeds) {
  381. if (blockIdx.x * blockDim.x + threadIdx.x >= count)
  382. return;
  383. uint32_t index = blockIdx.x * blockDim.x + threadIdx.x;
  384. int64_t seed = seeds[index];
  385. //REGION: check if the player is in a plains biome
  386. SimplexOctave tempOct[4];
  387. SimplexOctave precipOct[2];
  388. SimplexOctave humidOct[4];
  389. {
  390. register Random biomeSeed = get_random(seed * 9871LL);
  391. double tempAtPos = getSimplexNoise((double)PLAINS_BIOME_PLAYER_X, (double)PLAINS_BIOME_PLAYER_Z, 0.02500000037252903, 0.02500000037252903, 0.25, 4, &biomeSeed, tempOct);
  392. if (!(1.06<tempAtPos&&tempAtPos<3.006)) {
  393. seeds[index] = 0;
  394. return;
  395. }
  396. biomeSeed = get_random(seed * 0x84a59LL);
  397. double precipAtPos = getSimplexNoise((double)PLAINS_BIOME_PLAYER_X, (double)PLAINS_BIOME_PLAYER_Z, 0.25, 0.25, 0.58823529411764708, 2, &biomeSeed, precipOct);
  398. //If its not a plains biome
  399. if (ConvertToIndex(getTempFromTempAndPrecip(tempAtPos, precipAtPos))<62) {
  400. seeds[index] = 0;
  401. return;
  402. }
  403. biomeSeed = get_random(seed * 39811LL);
  404. double humidAtPos = getSimplexHumidtyInital((double)PLAINS_BIOME_PLAYER_X, (double)PLAINS_BIOME_PLAYER_Z, &biomeSeed, humidOct);
  405. int32_t humid_index = ConvertToIndex(getHumidFromHumidAndPrecip(humidAtPos, precipAtPos));
  406. if (!(12 < humid_index && humid_index < 29)) {
  407. seeds[index] = 0;
  408. return;
  409. }
  410. }
  411. if (getBiome(DESERT_BIOME_X, DESERT_BIOME_Z, precipOct, tempOct, humidOct)!=8) {
  412. seeds[index] = 0;
  413. return;
  414. }
  415. int biome_num = getBiome(PLAINS_BIOME_X, PLAINS_BIOME_Z, precipOct, tempOct, humidOct);
  416. if (!(biome_num==9||biome_num==6)) {
  417. seeds[index] = 0;
  418. return;
  419. }
  420. if (getBiome(DESERT_BIOME_X, DESERT_BIOME_Z, precipOct, tempOct, humidOct)!=8) {
  421. seeds[index] = 0;
  422. return;
  423. }
  424. if (getBiome(DESERT_BIOME_2_X, DESERT_BIOME_2_Z, precipOct, tempOct, humidOct)!=8) {
  425. seeds[index] = 0;
  426. return;
  427. }
  428. biome_num = getBiome(PLAINS_FOREST_BIOME_2_X, PLAINS_FOREST_BIOME_2_Z, precipOct, tempOct, humidOct);
  429. if (!(biome_num==9||biome_num==4||biome_num==6)) {
  430. seeds[index] = 0;
  431. return;
  432. }
  433. }
  434. namespace host_processing { //region Host side processing
  435. #ifdef BOINC
  436. bool setCudaBlockingSync(int device) {
  437. CUdevice hcuDevice;
  438. CUcontext hcuContext;
  439. CUresult status = cuInit(0);
  440. if(status != CUDA_SUCCESS)
  441. return false;
  442. status = cuDeviceGet( &hcuDevice, device);
  443. if(status != CUDA_SUCCESS)
  444. return false;
  445. status = cuCtxCreate( &hcuContext, 0x4, hcuDevice );
  446. if(status != CUDA_SUCCESS)
  447. return false;
  448. return true;
  449. }
  450. #endif
  451. #ifndef BOINC
  452. #define boinc_begin_critical_section()
  453. #define boinc_end_critical_section()
  454. #define boinc_finish(status)
  455. #define boinc_fraction_done(fraction)
  456. #endif
  457. #define GPU_ASSERT(code) gpuAssert((code), __FILE__, __LINE__)
  458. inline void gpuAssert(cudaError_t code, const char *file, int line) {
  459. if (code != cudaSuccess) {
  460. fprintf(stderr, "GPUassert: %s (code %d) %s %d\n", cudaGetErrorString(code), code, file, line);
  461. boinc_finish(code);
  462. #ifndef BOINC
  463. exit(code);
  464. #endif
  465. }
  466. }
  467. #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
  468. #include <windows.h>
  469. uint64_t getCurrentTimeMillis() {
  470. SYSTEMTIME time;
  471. GetSystemTime(&time);
  472. return (uint64_t)((time.wSecond * 1000) + time.wMilliseconds);
  473. }
  474. #else
  475. #include <sys/time.h>
  476. uint64_t getCurrentTimeMillis() {
  477. struct timeval te;
  478. gettimeofday(&te, NULL); // get current time
  479. uint64_t milliseconds = te.tv_sec*1000LL + te.tv_usec/1000; // calculate milliseconds
  480. return milliseconds;
  481. }
  482. #endif
  483. uint32_t actual_count = 0;
  484. int host_main(int argc, char** argv) {
  485. #ifdef BOINC
  486. BOINC_OPTIONS options;
  487. boinc_options_defaults(options);
  488. options.normal_thread_priority = true;
  489. boinc_init_options(&options);
  490. #endif
  491. if (argc < 3) {
  492. fprintf(stderr, "Not enough arguments\n");
  493. return 2;
  494. }
  495. int start_batch = atoi(argv[1]);
  496. int end_batch = atoi(argv[2]);
  497. if (start_batch < 0 || start_batch >= end_batch || end_batch > (1ULL << 48) / SEEDS_PER_CALL) {
  498. fprintf(stderr, "Invalid batch bounds: %d to %d\n", start_batch, end_batch);
  499. return 1;
  500. }
  501. fprintf(stderr, "doing between %lld (inclusive) and %lld (exclusive)\n", start_batch * SEEDS_PER_CALL, end_batch * SEEDS_PER_CALL);
  502. int gpu_device = 0;
  503. #ifdef BOINC
  504. APP_INIT_DATA aid;
  505. boinc_get_init_data(aid);
  506. if (aid.gpu_device_num >= 0) {
  507. gpu_device = aid.gpu_device_num;
  508. fprintf(stderr,"boinc gpu %i gpuindex: %i \n", aid.gpu_device_num, gpu_device);
  509. } else {
  510. fprintf(stderr,"stdalone gpuindex % \n", gpu_device);
  511. }
  512. setCudaBlockingSync(gpu_device);
  513. #endif
  514. cudaSetDevice(gpu_device);
  515. GPU_ASSERT(cudaDeviceSetCacheConfig(cudaFuncCachePreferShared));
  516. //cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
  517. GPU_ASSERT(cudaPeekAtLastError());
  518. GPU_ASSERT(cudaDeviceSynchronize());
  519. uint32_t* count;
  520. GPU_ASSERT(cudaMallocManaged(&count, sizeof(*count)));
  521. GPU_ASSERT(cudaPeekAtLastError());
  522. uint64_t* seedBuffer;
  523. GPU_ASSERT(cudaMallocManaged(&seedBuffer, sizeof(*seedBuffer) * (SEEDS_PER_CALL>>5)));//5 is an estimate taken from the number of seeds filtered
  524. GPU_ASSERT(cudaPeekAtLastError());
  525. for (uint64_t seed = start_batch * SEEDS_PER_CALL, end_seed = end_batch * SEEDS_PER_CALL; seed < end_seed; seed+=SEEDS_PER_CALL) {
  526. uint64_t start = getCurrentTimeMillis();
  527. boinc_begin_critical_section();
  528. *count = 0;
  529. checkSeedBiomesHumidity<<< 1ULL << WORK_SIZE_BITS, BLOCK_SIZE>>>(seed, count, seedBuffer); // produces about 32k seeds per call
  530. GPU_ASSERT(cudaPeekAtLastError());
  531. GPU_ASSERT(cudaDeviceSynchronize());
  532. //Double check work size calculation
  533. part2ElectricBooglo<<< ceil(((double)*count)/BLOCK_SIZE), BLOCK_SIZE>>>(seed, *count, seedBuffer);
  534. GPU_ASSERT(cudaPeekAtLastError());
  535. GPU_ASSERT(cudaDeviceSynchronize());
  536. //uint32_t actual_count = 0;
  537. for(uint32_t i = 0; i<*count;i++) {
  538. uint64_t seed = seedBuffer[i];
  539. if( seed != 0) {
  540. actual_count ++;
  541. fprintf(stderr, "SEED FOUND: %lld\n",seed);
  542. }
  543. }
  544. boinc_end_critical_section();
  545. uint64_t end = getCurrentTimeMillis();
  546. double fraction_done = ((double)(seed-(start_batch * SEEDS_PER_CALL)))/((end_batch * SEEDS_PER_CALL)-(start_batch * SEEDS_PER_CALL));
  547. printf("Time elapsed %dms, speed: %.2fm/s, seed count 1: %i, seed count 2: %i, percent done: %f\n", (int)(end - start),((double)((1ULL<<WORK_SIZE_BITS)*(BLOCK_SIZE)))/((double)(end - start))/1000.0,*count, actual_count, fraction_done*100);
  548. if ((seed / SEEDS_PER_CALL) % 30) { // about every 15 seconds
  549. boinc_fraction_done(fraction_done);
  550. }
  551. }
  552. fprintf(stderr, "Finished work unit\n");
  553. boinc_finish(0);
  554. return 0;
  555. }
  556. }
  557. using namespace host_processing;
  558. int main(int argc, char** argv) { return host_main(argc, argv); }