astcenc_partition_tables.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2023 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions for generating partition tables on demand.
  19. */
  20. #include "astcenc_internal.h"
  21. /** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */
  22. #define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  23. /**
  24. * @brief Generate a canonical representation of a partition pattern.
  25. *
  26. * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
  27. * the remapped texel index. Remapping ensures that we only match on the partition pattern,
  28. * independent of the partition order generated by the hash.
  29. *
  30. * @param texel_count The number of texels in the block.
  31. * @param partition_of_texel The partition assignments, in hash order.
  32. * @param[out] bit_pattern The output bit pattern representation.
  33. */
  34. static void generate_canonical_partitioning(
  35. unsigned int texel_count,
  36. const uint8_t* partition_of_texel,
  37. uint64_t bit_pattern[BIT_PATTERN_WORDS]
  38. ) {
  39. // Clear the pattern
  40. for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
  41. {
  42. bit_pattern[i] = 0;
  43. }
  44. // Store a mapping to reorder the raw partitions so that the partitions are ordered such
  45. // that the lowest texel index in partition N is smaller than the lowest texel index in
  46. // partition N + 1.
  47. int mapped_index[BLOCK_MAX_PARTITIONS];
  48. int map_weight_count = 0;
  49. for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
  50. {
  51. mapped_index[i] = -1;
  52. }
  53. for (unsigned int i = 0; i < texel_count; i++)
  54. {
  55. int index = partition_of_texel[i];
  56. if (mapped_index[index] < 0)
  57. {
  58. mapped_index[index] = map_weight_count++;
  59. }
  60. uint64_t xlat_index = mapped_index[index];
  61. bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
  62. }
  63. }
  64. /**
  65. * @brief Compare two canonical patterns to see if they are the same.
  66. *
  67. * @param part1 The first canonical bit pattern to check.
  68. * @param part2 The second canonical bit pattern to check.
  69. *
  70. * @return @c true if the patterns are the same, @c false otherwise.
  71. */
  72. static bool compare_canonical_partitionings(
  73. const uint64_t part1[BIT_PATTERN_WORDS],
  74. const uint64_t part2[BIT_PATTERN_WORDS]
  75. ) {
  76. return (part1[0] == part2[0])
  77. #if BIT_PATTERN_WORDS > 1
  78. && (part1[1] == part2[1])
  79. #endif
  80. #if BIT_PATTERN_WORDS > 2
  81. && (part1[2] == part2[2])
  82. #endif
  83. #if BIT_PATTERN_WORDS > 3
  84. && (part1[3] == part2[3])
  85. #endif
  86. #if BIT_PATTERN_WORDS > 4
  87. && (part1[4] == part2[4])
  88. #endif
  89. #if BIT_PATTERN_WORDS > 5
  90. && (part1[5] == part2[5])
  91. #endif
  92. #if BIT_PATTERN_WORDS > 6
  93. && (part1[6] == part2[6])
  94. #endif
  95. ;
  96. }
  97. /**
  98. * @brief Hash function used for procedural partition assignment.
  99. *
  100. * @param inp The hash seed.
  101. *
  102. * @return The hashed value.
  103. */
  104. static uint32_t hash52(
  105. uint32_t inp
  106. ) {
  107. inp ^= inp >> 15;
  108. // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
  109. inp *= 0xEEDE0891;
  110. inp ^= inp >> 5;
  111. inp += inp << 16;
  112. inp ^= inp >> 7;
  113. inp ^= inp >> 3;
  114. inp ^= inp << 6;
  115. inp ^= inp >> 17;
  116. return inp;
  117. }
  118. /**
  119. * @brief Select texel assignment for a single coordinate.
  120. *
  121. * @param seed The seed - the partition index from the block.
  122. * @param x The texel X coordinate in the block.
  123. * @param y The texel Y coordinate in the block.
  124. * @param z The texel Z coordinate in the block.
  125. * @param partition_count The total partition count of this encoding.
  126. * @param small_block @c true if the block has fewer than 32 texels.
  127. *
  128. * @return The assigned partition index for this texel.
  129. */
  130. static uint8_t select_partition(
  131. int seed,
  132. int x,
  133. int y,
  134. int z,
  135. int partition_count,
  136. bool small_block
  137. ) {
  138. // For small blocks bias the coordinates to get better distribution
  139. if (small_block)
  140. {
  141. x <<= 1;
  142. y <<= 1;
  143. z <<= 1;
  144. }
  145. seed += (partition_count - 1) * 1024;
  146. uint32_t rnum = hash52(seed);
  147. uint8_t seed1 = rnum & 0xF;
  148. uint8_t seed2 = (rnum >> 4) & 0xF;
  149. uint8_t seed3 = (rnum >> 8) & 0xF;
  150. uint8_t seed4 = (rnum >> 12) & 0xF;
  151. uint8_t seed5 = (rnum >> 16) & 0xF;
  152. uint8_t seed6 = (rnum >> 20) & 0xF;
  153. uint8_t seed7 = (rnum >> 24) & 0xF;
  154. uint8_t seed8 = (rnum >> 28) & 0xF;
  155. uint8_t seed9 = (rnum >> 18) & 0xF;
  156. uint8_t seed10 = (rnum >> 22) & 0xF;
  157. uint8_t seed11 = (rnum >> 26) & 0xF;
  158. uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
  159. // Squaring all the seeds in order to bias their distribution towards lower values.
  160. seed1 *= seed1;
  161. seed2 *= seed2;
  162. seed3 *= seed3;
  163. seed4 *= seed4;
  164. seed5 *= seed5;
  165. seed6 *= seed6;
  166. seed7 *= seed7;
  167. seed8 *= seed8;
  168. seed9 *= seed9;
  169. seed10 *= seed10;
  170. seed11 *= seed11;
  171. seed12 *= seed12;
  172. int sh1, sh2;
  173. if (seed & 1)
  174. {
  175. sh1 = (seed & 2 ? 4 : 5);
  176. sh2 = (partition_count == 3 ? 6 : 5);
  177. }
  178. else
  179. {
  180. sh1 = (partition_count == 3 ? 6 : 5);
  181. sh2 = (seed & 2 ? 4 : 5);
  182. }
  183. int sh3 = (seed & 0x10) ? sh1 : sh2;
  184. seed1 >>= sh1;
  185. seed2 >>= sh2;
  186. seed3 >>= sh1;
  187. seed4 >>= sh2;
  188. seed5 >>= sh1;
  189. seed6 >>= sh2;
  190. seed7 >>= sh1;
  191. seed8 >>= sh2;
  192. seed9 >>= sh3;
  193. seed10 >>= sh3;
  194. seed11 >>= sh3;
  195. seed12 >>= sh3;
  196. int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
  197. int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
  198. int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
  199. int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
  200. // Apply the saw
  201. a &= 0x3F;
  202. b &= 0x3F;
  203. c &= 0x3F;
  204. d &= 0x3F;
  205. // Remove some of the components if we are to output < 4 partitions.
  206. if (partition_count <= 3)
  207. {
  208. d = 0;
  209. }
  210. if (partition_count <= 2)
  211. {
  212. c = 0;
  213. }
  214. if (partition_count <= 1)
  215. {
  216. b = 0;
  217. }
  218. uint8_t partition;
  219. if (a >= b && a >= c && a >= d)
  220. {
  221. partition = 0;
  222. }
  223. else if (b >= c && b >= d)
  224. {
  225. partition = 1;
  226. }
  227. else if (c >= d)
  228. {
  229. partition = 2;
  230. }
  231. else
  232. {
  233. partition = 3;
  234. }
  235. return partition;
  236. }
  237. /**
  238. * @brief Generate a single partition info structure.
  239. *
  240. * @param[out] bsd The block size information.
  241. * @param partition_count The partition count of this partitioning.
  242. * @param partition_index The partition index / seed of this partitioning.
  243. * @param partition_remap_index The remapped partition index of this partitioning.
  244. * @param[out] pi The partition info structure to populate.
  245. *
  246. * @return True if this is a useful partition index, False if we can skip it.
  247. */
  248. static bool generate_one_partition_info_entry(
  249. block_size_descriptor& bsd,
  250. unsigned int partition_count,
  251. unsigned int partition_index,
  252. unsigned int partition_remap_index,
  253. partition_info& pi
  254. ) {
  255. int texels_per_block = bsd.texel_count;
  256. bool small_block = texels_per_block < 32;
  257. uint8_t *partition_of_texel = pi.partition_of_texel;
  258. // Assign texels to partitions
  259. int texel_idx = 0;
  260. int counts[BLOCK_MAX_PARTITIONS] { 0 };
  261. for (unsigned int z = 0; z < bsd.zdim; z++)
  262. {
  263. for (unsigned int y = 0; y < bsd.ydim; y++)
  264. {
  265. for (unsigned int x = 0; x < bsd.xdim; x++)
  266. {
  267. uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
  268. pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
  269. *partition_of_texel++ = part;
  270. }
  271. }
  272. }
  273. // Fill loop tail so we can overfetch later
  274. for (unsigned int i = 0; i < partition_count; i++)
  275. {
  276. int ptex_count = counts[i];
  277. int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
  278. for (int j = ptex_count; j < ptex_count_simd; j++)
  279. {
  280. pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
  281. }
  282. }
  283. // Populate the actual procedural partition count
  284. if (counts[0] == 0)
  285. {
  286. pi.partition_count = 0;
  287. }
  288. else if (counts[1] == 0)
  289. {
  290. pi.partition_count = 1;
  291. }
  292. else if (counts[2] == 0)
  293. {
  294. pi.partition_count = 2;
  295. }
  296. else if (counts[3] == 0)
  297. {
  298. pi.partition_count = 3;
  299. }
  300. else
  301. {
  302. pi.partition_count = 4;
  303. }
  304. // Populate the partition index
  305. pi.partition_index = static_cast<uint16_t>(partition_index);
  306. // Populate the coverage bitmaps for 2/3/4 partitions
  307. uint64_t* bitmaps { nullptr };
  308. if (partition_count == 2)
  309. {
  310. bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
  311. }
  312. else if (partition_count == 3)
  313. {
  314. bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
  315. }
  316. else if (partition_count == 4)
  317. {
  318. bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
  319. }
  320. for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
  321. {
  322. pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
  323. }
  324. // Valid partitionings have texels in all of the requested partitions
  325. bool valid = pi.partition_count == partition_count;
  326. if (bitmaps)
  327. {
  328. // Populate the partition coverage bitmap
  329. for (unsigned int i = 0; i < partition_count; i++)
  330. {
  331. bitmaps[i] = 0ULL;
  332. }
  333. unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
  334. for (unsigned int i = 0; i < texels_to_process; i++)
  335. {
  336. unsigned int idx = bsd.kmeans_texels[i];
  337. bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
  338. }
  339. }
  340. return valid;
  341. }
  342. static void build_partition_table_for_one_partition_count(
  343. block_size_descriptor& bsd,
  344. bool can_omit_partitionings,
  345. unsigned int partition_count_cutoff,
  346. unsigned int partition_count,
  347. partition_info* ptab,
  348. uint64_t* canonical_patterns
  349. ) {
  350. unsigned int next_index = 0;
  351. bsd.partitioning_count_selected[partition_count - 1] = 0;
  352. bsd.partitioning_count_all[partition_count - 1] = 0;
  353. // Skip tables larger than config max partition count if we can omit modes
  354. if (can_omit_partitionings && (partition_count > partition_count_cutoff))
  355. {
  356. return;
  357. }
  358. // Iterate through twice
  359. // - Pass 0: Keep selected partitionings
  360. // - Pass 1: Keep non-selected partitionings (skip if in omit mode)
  361. unsigned int max_iter = can_omit_partitionings ? 1 : 2;
  362. // Tracker for things we built in the first iteration
  363. uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
  364. for (unsigned int x = 0; x < max_iter; x++)
  365. {
  366. for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
  367. {
  368. // Don't include things we built in the first pass
  369. if ((x == 1) && build[i])
  370. {
  371. continue;
  372. }
  373. bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
  374. if ((x == 0) && !keep_useful)
  375. {
  376. continue;
  377. }
  378. generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
  379. bool keep_canonical = true;
  380. for (unsigned int j = 0; j < next_index; j++)
  381. {
  382. bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS);
  383. if (match)
  384. {
  385. keep_canonical = false;
  386. break;
  387. }
  388. }
  389. if (keep_useful && keep_canonical)
  390. {
  391. if (x == 0)
  392. {
  393. bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
  394. bsd.partitioning_count_selected[partition_count - 1]++;
  395. bsd.partitioning_count_all[partition_count - 1]++;
  396. build[i] = 1;
  397. next_index++;
  398. }
  399. }
  400. else
  401. {
  402. if (x == 1)
  403. {
  404. bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
  405. bsd.partitioning_count_all[partition_count - 1]++;
  406. next_index++;
  407. }
  408. }
  409. }
  410. }
  411. }
  412. /* See header for documentation. */
  413. void init_partition_tables(
  414. block_size_descriptor& bsd,
  415. bool can_omit_partitionings,
  416. unsigned int partition_count_cutoff
  417. ) {
  418. partition_info* par_tab2 = bsd.partitionings;
  419. partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
  420. partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
  421. partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
  422. generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
  423. bsd.partitioning_count_selected[0] = 1;
  424. bsd.partitioning_count_all[0] = 1;
  425. uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
  426. build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
  427. build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
  428. build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
  429. delete[] canonical_patterns;
  430. }