astcenc_block_sizes.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2023 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions to generate block size descriptor and decimation tables.
  19. */
  20. #include "astcenc_internal.h"
  21. /**
  22. * @brief Decode the properties of an encoded 2D block mode.
  23. *
  24. * @param block_mode The encoded block mode.
  25. * @param[out] x_weights The number of weights in the X dimension.
  26. * @param[out] y_weights The number of weights in the Y dimension.
  27. * @param[out] is_dual_plane True if this block mode has two weight planes.
  28. * @param[out] quant_mode The quantization level for the weights.
  29. * @param[out] weight_bits The storage bit count for the weights.
  30. *
  31. * @return Returns true if a valid mode, false otherwise.
  32. */
  33. static bool decode_block_mode_2d(
  34. unsigned int block_mode,
  35. unsigned int& x_weights,
  36. unsigned int& y_weights,
  37. bool& is_dual_plane,
  38. unsigned int& quant_mode,
  39. unsigned int& weight_bits
  40. ) {
  41. unsigned int base_quant_mode = (block_mode >> 4) & 1;
  42. unsigned int H = (block_mode >> 9) & 1;
  43. unsigned int D = (block_mode >> 10) & 1;
  44. unsigned int A = (block_mode >> 5) & 0x3;
  45. x_weights = 0;
  46. y_weights = 0;
  47. if ((block_mode & 3) != 0)
  48. {
  49. base_quant_mode |= (block_mode & 3) << 1;
  50. unsigned int B = (block_mode >> 7) & 3;
  51. switch ((block_mode >> 2) & 3)
  52. {
  53. case 0:
  54. x_weights = B + 4;
  55. y_weights = A + 2;
  56. break;
  57. case 1:
  58. x_weights = B + 8;
  59. y_weights = A + 2;
  60. break;
  61. case 2:
  62. x_weights = A + 2;
  63. y_weights = B + 8;
  64. break;
  65. case 3:
  66. B &= 1;
  67. if (block_mode & 0x100)
  68. {
  69. x_weights = B + 2;
  70. y_weights = A + 2;
  71. }
  72. else
  73. {
  74. x_weights = A + 2;
  75. y_weights = B + 6;
  76. }
  77. break;
  78. }
  79. }
  80. else
  81. {
  82. base_quant_mode |= ((block_mode >> 2) & 3) << 1;
  83. if (((block_mode >> 2) & 3) == 0)
  84. {
  85. return false;
  86. }
  87. unsigned int B = (block_mode >> 9) & 3;
  88. switch ((block_mode >> 7) & 3)
  89. {
  90. case 0:
  91. x_weights = 12;
  92. y_weights = A + 2;
  93. break;
  94. case 1:
  95. x_weights = A + 2;
  96. y_weights = 12;
  97. break;
  98. case 2:
  99. x_weights = A + 6;
  100. y_weights = B + 6;
  101. D = 0;
  102. H = 0;
  103. break;
  104. case 3:
  105. switch ((block_mode >> 5) & 3)
  106. {
  107. case 0:
  108. x_weights = 6;
  109. y_weights = 10;
  110. break;
  111. case 1:
  112. x_weights = 10;
  113. y_weights = 6;
  114. break;
  115. case 2:
  116. case 3:
  117. return false;
  118. }
  119. break;
  120. }
  121. }
  122. unsigned int weight_count = x_weights * y_weights * (D + 1);
  123. quant_mode = (base_quant_mode - 2) + 6 * H;
  124. is_dual_plane = D != 0;
  125. weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  126. return (weight_count <= BLOCK_MAX_WEIGHTS &&
  127. weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  128. weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  129. }
  130. /**
  131. * @brief Decode the properties of an encoded 3D block mode.
  132. *
  133. * @param block_mode The encoded block mode.
  134. * @param[out] x_weights The number of weights in the X dimension.
  135. * @param[out] y_weights The number of weights in the Y dimension.
  136. * @param[out] z_weights The number of weights in the Z dimension.
  137. * @param[out] is_dual_plane True if this block mode has two weight planes.
  138. * @param[out] quant_mode The quantization level for the weights.
  139. * @param[out] weight_bits The storage bit count for the weights.
  140. *
  141. * @return Returns true if a valid mode, false otherwise.
  142. */
  143. static bool decode_block_mode_3d(
  144. unsigned int block_mode,
  145. unsigned int& x_weights,
  146. unsigned int& y_weights,
  147. unsigned int& z_weights,
  148. bool& is_dual_plane,
  149. unsigned int& quant_mode,
  150. unsigned int& weight_bits
  151. ) {
  152. unsigned int base_quant_mode = (block_mode >> 4) & 1;
  153. unsigned int H = (block_mode >> 9) & 1;
  154. unsigned int D = (block_mode >> 10) & 1;
  155. unsigned int A = (block_mode >> 5) & 0x3;
  156. x_weights = 0;
  157. y_weights = 0;
  158. z_weights = 0;
  159. if ((block_mode & 3) != 0)
  160. {
  161. base_quant_mode |= (block_mode & 3) << 1;
  162. unsigned int B = (block_mode >> 7) & 3;
  163. unsigned int C = (block_mode >> 2) & 0x3;
  164. x_weights = A + 2;
  165. y_weights = B + 2;
  166. z_weights = C + 2;
  167. }
  168. else
  169. {
  170. base_quant_mode |= ((block_mode >> 2) & 3) << 1;
  171. if (((block_mode >> 2) & 3) == 0)
  172. {
  173. return false;
  174. }
  175. int B = (block_mode >> 9) & 3;
  176. if (((block_mode >> 7) & 3) != 3)
  177. {
  178. D = 0;
  179. H = 0;
  180. }
  181. switch ((block_mode >> 7) & 3)
  182. {
  183. case 0:
  184. x_weights = 6;
  185. y_weights = B + 2;
  186. z_weights = A + 2;
  187. break;
  188. case 1:
  189. x_weights = A + 2;
  190. y_weights = 6;
  191. z_weights = B + 2;
  192. break;
  193. case 2:
  194. x_weights = A + 2;
  195. y_weights = B + 2;
  196. z_weights = 6;
  197. break;
  198. case 3:
  199. x_weights = 2;
  200. y_weights = 2;
  201. z_weights = 2;
  202. switch ((block_mode >> 5) & 3)
  203. {
  204. case 0:
  205. x_weights = 6;
  206. break;
  207. case 1:
  208. y_weights = 6;
  209. break;
  210. case 2:
  211. z_weights = 6;
  212. break;
  213. case 3:
  214. return false;
  215. }
  216. break;
  217. }
  218. }
  219. unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
  220. quant_mode = (base_quant_mode - 2) + 6 * H;
  221. is_dual_plane = D != 0;
  222. weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  223. return (weight_count <= BLOCK_MAX_WEIGHTS &&
  224. weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  225. weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  226. }
  227. /**
  228. * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
  229. *
  230. * @param x_texels The number of texels in the X dimension.
  231. * @param y_texels The number of texels in the Y dimension.
  232. * @param x_weights The number of weights in the X dimension.
  233. * @param y_weights The number of weights in the Y dimension.
  234. * @param[out] di The decimation info structure to populate.
  235. * @param[out] wb The decimation table init scratch working buffers.
  236. */
  237. static void init_decimation_info_2d(
  238. unsigned int x_texels,
  239. unsigned int y_texels,
  240. unsigned int x_weights,
  241. unsigned int y_weights,
  242. decimation_info& di,
  243. dt_init_working_buffers& wb
  244. ) {
  245. unsigned int texels_per_block = x_texels * y_texels;
  246. unsigned int weights_per_block = x_weights * y_weights;
  247. uint8_t max_texel_count_of_weight = 0;
  248. promise(weights_per_block > 0);
  249. promise(texels_per_block > 0);
  250. promise(x_texels > 0);
  251. promise(y_texels > 0);
  252. for (unsigned int i = 0; i < weights_per_block; i++)
  253. {
  254. wb.texel_count_of_weight[i] = 0;
  255. }
  256. for (unsigned int i = 0; i < texels_per_block; i++)
  257. {
  258. wb.weight_count_of_texel[i] = 0;
  259. }
  260. for (unsigned int y = 0; y < y_texels; y++)
  261. {
  262. for (unsigned int x = 0; x < x_texels; x++)
  263. {
  264. unsigned int texel = y * x_texels + x;
  265. unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  266. unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  267. unsigned int x_weight_frac = x_weight & 0xF;
  268. unsigned int y_weight_frac = y_weight & 0xF;
  269. unsigned int x_weight_int = x_weight >> 4;
  270. unsigned int y_weight_int = y_weight >> 4;
  271. unsigned int qweight[4];
  272. qweight[0] = x_weight_int + y_weight_int * x_weights;
  273. qweight[1] = qweight[0] + 1;
  274. qweight[2] = qweight[0] + x_weights;
  275. qweight[3] = qweight[2] + 1;
  276. // Truncated-precision bilinear interpolation
  277. unsigned int prod = x_weight_frac * y_weight_frac;
  278. unsigned int weight[4];
  279. weight[3] = (prod + 8) >> 4;
  280. weight[1] = x_weight_frac - weight[3];
  281. weight[2] = y_weight_frac - weight[3];
  282. weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
  283. for (unsigned int i = 0; i < 4; i++)
  284. {
  285. if (weight[i] != 0)
  286. {
  287. wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  288. wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  289. wb.weight_count_of_texel[texel]++;
  290. wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  291. wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  292. wb.texel_count_of_weight[qweight[i]]++;
  293. max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  294. }
  295. }
  296. }
  297. }
  298. uint8_t max_texel_weight_count = 0;
  299. for (unsigned int i = 0; i < texels_per_block; i++)
  300. {
  301. di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  302. max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  303. for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  304. {
  305. di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
  306. di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  307. di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
  308. }
  309. // Init all 4 entries so we can rely on zeros for vectorization
  310. for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
  311. {
  312. di.texel_weight_contribs_int_tr[j][i] = 0;
  313. di.texel_weight_contribs_float_tr[j][i] = 0.0f;
  314. di.texel_weights_tr[j][i] = 0;
  315. }
  316. }
  317. di.max_texel_weight_count = max_texel_weight_count;
  318. for (unsigned int i = 0; i < weights_per_block; i++)
  319. {
  320. unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  321. di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  322. for (unsigned int j = 0; j < texel_count_wt; j++)
  323. {
  324. uint8_t texel = wb.texels_of_weight[i][j];
  325. // Create transposed versions of these for better vectorization
  326. di.weight_texels_tr[j][i] = texel;
  327. di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  328. // Store the per-texel contribution of this weight for each texel it contributes to
  329. di.texel_contrib_for_weight[j][i] = 0.0f;
  330. for (unsigned int k = 0; k < 4; k++)
  331. {
  332. uint8_t dttw = di.texel_weights_tr[k][texel];
  333. float dttwf = di.texel_weight_contribs_float_tr[k][texel];
  334. if (dttw == i && dttwf != 0.0f)
  335. {
  336. di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
  337. break;
  338. }
  339. }
  340. }
  341. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  342. // Match last texel in active lane in SIMD group, for better gathers
  343. uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
  344. for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  345. {
  346. di.weight_texels_tr[j][i] = last_texel;
  347. di.weights_texel_contribs_tr[j][i] = 0.0f;
  348. }
  349. }
  350. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  351. unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  352. for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
  353. {
  354. di.texel_weight_count[i] = 0;
  355. for (unsigned int j = 0; j < 4; j++)
  356. {
  357. di.texel_weight_contribs_float_tr[j][i] = 0;
  358. di.texel_weights_tr[j][i] = 0;
  359. di.texel_weight_contribs_int_tr[j][i] = 0;
  360. }
  361. }
  362. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  363. // Match last texel in active lane in SIMD group, for better gathers
  364. unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  365. uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
  366. unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  367. for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
  368. {
  369. di.weight_texel_count[i] = 0;
  370. for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
  371. {
  372. di.weight_texels_tr[j][i] = last_texel;
  373. di.weights_texel_contribs_tr[j][i] = 0.0f;
  374. }
  375. }
  376. di.texel_count = static_cast<uint8_t>(texels_per_block);
  377. di.weight_count = static_cast<uint8_t>(weights_per_block);
  378. di.weight_x = static_cast<uint8_t>(x_weights);
  379. di.weight_y = static_cast<uint8_t>(y_weights);
  380. di.weight_z = 1;
  381. }
  382. /**
  383. * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
  384. *
  385. * @param x_texels The number of texels in the X dimension.
  386. * @param y_texels The number of texels in the Y dimension.
  387. * @param z_texels The number of texels in the Z dimension.
  388. * @param x_weights The number of weights in the X dimension.
  389. * @param y_weights The number of weights in the Y dimension.
  390. * @param z_weights The number of weights in the Z dimension.
  391. * @param[out] di The decimation info structure to populate.
  392. @param[out] wb The decimation table init scratch working buffers.
  393. */
  394. static void init_decimation_info_3d(
  395. unsigned int x_texels,
  396. unsigned int y_texels,
  397. unsigned int z_texels,
  398. unsigned int x_weights,
  399. unsigned int y_weights,
  400. unsigned int z_weights,
  401. decimation_info& di,
  402. dt_init_working_buffers& wb
  403. ) {
  404. unsigned int texels_per_block = x_texels * y_texels * z_texels;
  405. unsigned int weights_per_block = x_weights * y_weights * z_weights;
  406. uint8_t max_texel_count_of_weight = 0;
  407. promise(weights_per_block > 0);
  408. promise(texels_per_block > 0);
  409. for (unsigned int i = 0; i < weights_per_block; i++)
  410. {
  411. wb.texel_count_of_weight[i] = 0;
  412. }
  413. for (unsigned int i = 0; i < texels_per_block; i++)
  414. {
  415. wb.weight_count_of_texel[i] = 0;
  416. }
  417. for (unsigned int z = 0; z < z_texels; z++)
  418. {
  419. for (unsigned int y = 0; y < y_texels; y++)
  420. {
  421. for (unsigned int x = 0; x < x_texels; x++)
  422. {
  423. int texel = (z * y_texels + y) * x_texels + x;
  424. int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  425. int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  426. int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
  427. int x_weight_frac = x_weight & 0xF;
  428. int y_weight_frac = y_weight & 0xF;
  429. int z_weight_frac = z_weight & 0xF;
  430. int x_weight_int = x_weight >> 4;
  431. int y_weight_int = y_weight >> 4;
  432. int z_weight_int = z_weight >> 4;
  433. int qweight[4];
  434. int weight[4];
  435. qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
  436. qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
  437. // simplex interpolation
  438. int fs = x_weight_frac;
  439. int ft = y_weight_frac;
  440. int fp = z_weight_frac;
  441. int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
  442. int N = x_weights;
  443. int NM = x_weights * y_weights;
  444. int s1, s2, w0, w1, w2, w3;
  445. switch (cas)
  446. {
  447. case 7:
  448. s1 = 1;
  449. s2 = N;
  450. w0 = 16 - fs;
  451. w1 = fs - ft;
  452. w2 = ft - fp;
  453. w3 = fp;
  454. break;
  455. case 3:
  456. s1 = N;
  457. s2 = 1;
  458. w0 = 16 - ft;
  459. w1 = ft - fs;
  460. w2 = fs - fp;
  461. w3 = fp;
  462. break;
  463. case 5:
  464. s1 = 1;
  465. s2 = NM;
  466. w0 = 16 - fs;
  467. w1 = fs - fp;
  468. w2 = fp - ft;
  469. w3 = ft;
  470. break;
  471. case 4:
  472. s1 = NM;
  473. s2 = 1;
  474. w0 = 16 - fp;
  475. w1 = fp - fs;
  476. w2 = fs - ft;
  477. w3 = ft;
  478. break;
  479. case 2:
  480. s1 = N;
  481. s2 = NM;
  482. w0 = 16 - ft;
  483. w1 = ft - fp;
  484. w2 = fp - fs;
  485. w3 = fs;
  486. break;
  487. case 0:
  488. s1 = NM;
  489. s2 = N;
  490. w0 = 16 - fp;
  491. w1 = fp - ft;
  492. w2 = ft - fs;
  493. w3 = fs;
  494. break;
  495. default:
  496. s1 = NM;
  497. s2 = N;
  498. w0 = 16 - fp;
  499. w1 = fp - ft;
  500. w2 = ft - fs;
  501. w3 = fs;
  502. break;
  503. }
  504. qweight[1] = qweight[0] + s1;
  505. qweight[2] = qweight[1] + s2;
  506. weight[0] = w0;
  507. weight[1] = w1;
  508. weight[2] = w2;
  509. weight[3] = w3;
  510. for (unsigned int i = 0; i < 4; i++)
  511. {
  512. if (weight[i] != 0)
  513. {
  514. wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  515. wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  516. wb.weight_count_of_texel[texel]++;
  517. wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  518. wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  519. wb.texel_count_of_weight[qweight[i]]++;
  520. max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  521. }
  522. }
  523. }
  524. }
  525. }
  526. uint8_t max_texel_weight_count = 0;
  527. for (unsigned int i = 0; i < texels_per_block; i++)
  528. {
  529. di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  530. max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  531. // Init all 4 entries so we can rely on zeros for vectorization
  532. for (unsigned int j = 0; j < 4; j++)
  533. {
  534. di.texel_weight_contribs_int_tr[j][i] = 0;
  535. di.texel_weight_contribs_float_tr[j][i] = 0.0f;
  536. di.texel_weights_tr[j][i] = 0;
  537. }
  538. for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  539. {
  540. di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
  541. di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  542. di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
  543. }
  544. }
  545. di.max_texel_weight_count = max_texel_weight_count;
  546. for (unsigned int i = 0; i < weights_per_block; i++)
  547. {
  548. unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  549. di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  550. for (unsigned int j = 0; j < texel_count_wt; j++)
  551. {
  552. unsigned int texel = wb.texels_of_weight[i][j];
  553. // Create transposed versions of these for better vectorization
  554. di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
  555. di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  556. // Store the per-texel contribution of this weight for each texel it contributes to
  557. di.texel_contrib_for_weight[j][i] = 0.0f;
  558. for (unsigned int k = 0; k < 4; k++)
  559. {
  560. uint8_t dttw = di.texel_weights_tr[k][texel];
  561. float dttwf = di.texel_weight_contribs_float_tr[k][texel];
  562. if (dttw == i && dttwf != 0.0f)
  563. {
  564. di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
  565. break;
  566. }
  567. }
  568. }
  569. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  570. // Match last texel in active lane in SIMD group, for better gathers
  571. uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
  572. for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  573. {
  574. di.weight_texels_tr[j][i] = last_texel;
  575. di.weights_texel_contribs_tr[j][i] = 0.0f;
  576. }
  577. }
  578. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  579. unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  580. for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
  581. {
  582. di.texel_weight_count[i] = 0;
  583. for (unsigned int j = 0; j < 4; j++)
  584. {
  585. di.texel_weight_contribs_float_tr[j][i] = 0;
  586. di.texel_weights_tr[j][i] = 0;
  587. di.texel_weight_contribs_int_tr[j][i] = 0;
  588. }
  589. }
  590. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  591. // Match last texel in active lane in SIMD group, for better gathers
  592. int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  593. uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
  594. unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  595. for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
  596. {
  597. di.weight_texel_count[i] = 0;
  598. for (int j = 0; j < max_texel_count_of_weight; j++)
  599. {
  600. di.weight_texels_tr[j][i] = last_texel;
  601. di.weights_texel_contribs_tr[j][i] = 0.0f;
  602. }
  603. }
  604. di.texel_count = static_cast<uint8_t>(texels_per_block);
  605. di.weight_count = static_cast<uint8_t>(weights_per_block);
  606. di.weight_x = static_cast<uint8_t>(x_weights);
  607. di.weight_y = static_cast<uint8_t>(y_weights);
  608. di.weight_z = static_cast<uint8_t>(z_weights);
  609. }
  610. /**
  611. * @brief Assign the texels to use for kmeans clustering.
  612. *
  613. * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
  614. * The @c bsd.texel_count is an input and must be populated beforehand.
  615. *
  616. * @param[in,out] bsd The block size descriptor to populate.
  617. */
  618. static void assign_kmeans_texels(
  619. block_size_descriptor& bsd
  620. ) {
  621. // Use all texels for kmeans on a small block
  622. if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
  623. {
  624. for (uint8_t i = 0; i < bsd.texel_count; i++)
  625. {
  626. bsd.kmeans_texels[i] = i;
  627. }
  628. return;
  629. }
  630. // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
  631. uint64_t rng_state[2];
  632. astc::rand_init(rng_state);
  633. // Initialize array used for tracking used indices
  634. bool seen[BLOCK_MAX_TEXELS];
  635. for (uint8_t i = 0; i < bsd.texel_count; i++)
  636. {
  637. seen[i] = false;
  638. }
  639. // Assign 64 random indices, retrying if we see repeats
  640. unsigned int arr_elements_set = 0;
  641. while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
  642. {
  643. uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
  644. texel = texel % bsd.texel_count;
  645. if (!seen[texel])
  646. {
  647. bsd.kmeans_texels[arr_elements_set++] = texel;
  648. seen[texel] = true;
  649. }
  650. }
  651. }
  652. /**
  653. * @brief Allocate a single 2D decimation table entry.
  654. *
  655. * @param x_texels The number of texels in the X dimension.
  656. * @param y_texels The number of texels in the Y dimension.
  657. * @param x_weights The number of weights in the X dimension.
  658. * @param y_weights The number of weights in the Y dimension.
  659. * @param bsd The block size descriptor we are populating.
  660. * @param wb The decimation table init scratch working buffers.
  661. * @param index The packed array index to populate.
  662. */
  663. static void construct_dt_entry_2d(
  664. unsigned int x_texels,
  665. unsigned int y_texels,
  666. unsigned int x_weights,
  667. unsigned int y_weights,
  668. block_size_descriptor& bsd,
  669. dt_init_working_buffers& wb,
  670. unsigned int index
  671. ) {
  672. unsigned int weight_count = x_weights * y_weights;
  673. assert(weight_count <= BLOCK_MAX_WEIGHTS);
  674. bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
  675. decimation_info& di = bsd.decimation_tables[index];
  676. init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
  677. int maxprec_1plane = -1;
  678. int maxprec_2planes = -1;
  679. for (int i = 0; i < 12; i++)
  680. {
  681. unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
  682. if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  683. {
  684. maxprec_1plane = i;
  685. }
  686. if (try_2planes)
  687. {
  688. unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
  689. if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  690. {
  691. maxprec_2planes = i;
  692. }
  693. }
  694. }
  695. // At least one of the two should be valid ...
  696. assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
  697. bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
  698. bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
  699. bsd.decimation_modes[index].refprec_1plane = 0;
  700. bsd.decimation_modes[index].refprec_2planes = 0;
  701. }
  702. /**
  703. * @brief Allocate block modes and decimation tables for a single 2D block size.
  704. *
  705. * @param x_texels The number of texels in the X dimension.
  706. * @param y_texels The number of texels in the Y dimension.
  707. * @param can_omit_modes Can we discard modes that astcenc won't use, even if legal?
  708. * @param mode_cutoff Percentile cutoff in range [0,1]. Low values more likely to be used.
  709. * @param[out] bsd The block size descriptor to populate.
  710. */
  711. static void construct_block_size_descriptor_2d(
  712. unsigned int x_texels,
  713. unsigned int y_texels,
  714. bool can_omit_modes,
  715. float mode_cutoff,
  716. block_size_descriptor& bsd
  717. ) {
  718. // Store a remap table for storing packed decimation modes.
  719. // Indexing uses [Y * 16 + X] and max size for each axis is 12.
  720. static const unsigned int MAX_DMI = 12 * 16 + 12;
  721. int decimation_mode_index[MAX_DMI];
  722. dt_init_working_buffers* wb = new dt_init_working_buffers;
  723. bsd.xdim = static_cast<uint8_t>(x_texels);
  724. bsd.ydim = static_cast<uint8_t>(y_texels);
  725. bsd.zdim = 1;
  726. bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
  727. for (unsigned int i = 0; i < MAX_DMI; i++)
  728. {
  729. decimation_mode_index[i] = -1;
  730. }
  731. // Gather all the decimation grids that can be used with the current block
  732. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  733. const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
  734. float always_cutoff = 0.0f;
  735. #else
  736. // Unused in decompress-only builds
  737. (void)can_omit_modes;
  738. (void)mode_cutoff;
  739. #endif
  740. // Construct the list of block formats referencing the decimation tables
  741. unsigned int packed_bm_idx = 0;
  742. unsigned int packed_dm_idx = 0;
  743. // Trackers
  744. unsigned int bm_counts[4] { 0 };
  745. unsigned int dm_counts[4] { 0 };
  746. // Clear the list to a known-bad value
  747. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  748. {
  749. bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
  750. }
  751. // Iterate four times to build a usefully ordered list:
  752. // - Pass 0 - keep selected single plane "always" block modes
  753. // - Pass 1 - keep selected single plane "non-always" block modes
  754. // - Pass 2 - keep select dual plane block modes
  755. // - Pass 3 - keep everything else that's legal
  756. unsigned int limit = can_omit_modes ? 3 : 4;
  757. for (unsigned int j = 0; j < limit; j ++)
  758. {
  759. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  760. {
  761. // Skip modes we've already included in a previous pass
  762. if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  763. {
  764. continue;
  765. }
  766. // Decode parameters
  767. unsigned int x_weights;
  768. unsigned int y_weights;
  769. bool is_dual_plane;
  770. unsigned int quant_mode;
  771. unsigned int weight_bits;
  772. bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
  773. // Always skip invalid encodings for the current block size
  774. if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
  775. {
  776. continue;
  777. }
  778. // Selectively skip dual plane encodings
  779. if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
  780. {
  781. continue;
  782. }
  783. // Always skip encodings we can't physically encode based on
  784. // generic encoding bit availability
  785. if (is_dual_plane)
  786. {
  787. // This is the only check we need as only support 1 partition
  788. if ((109 - weight_bits) <= 0)
  789. {
  790. continue;
  791. }
  792. }
  793. else
  794. {
  795. // This is conservative - fewer bits may be available for > 1 partition
  796. if ((111 - weight_bits) <= 0)
  797. {
  798. continue;
  799. }
  800. }
  801. // Selectively skip encodings based on percentile
  802. bool percentile_hit = false;
  803. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  804. if (j == 0)
  805. {
  806. percentile_hit = percentiles[i] <= always_cutoff;
  807. }
  808. else
  809. {
  810. percentile_hit = percentiles[i] <= mode_cutoff;
  811. }
  812. #endif
  813. if (j != 3 && !percentile_hit)
  814. {
  815. continue;
  816. }
  817. // Allocate and initialize the decimation table entry if we've not used it yet
  818. int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
  819. if (decimation_mode < 0)
  820. {
  821. construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
  822. decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
  823. decimation_mode = packed_dm_idx;
  824. dm_counts[j]++;
  825. packed_dm_idx++;
  826. }
  827. auto& bm = bsd.block_modes[packed_bm_idx];
  828. bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
  829. bm.quant_mode = static_cast<uint8_t>(quant_mode);
  830. bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
  831. bm.weight_bits = static_cast<uint8_t>(weight_bits);
  832. bm.mode_index = static_cast<uint16_t>(i);
  833. auto& dm = bsd.decimation_modes[decimation_mode];
  834. if (is_dual_plane)
  835. {
  836. dm.set_ref_2plane(bm.get_weight_quant_mode());
  837. }
  838. else
  839. {
  840. dm.set_ref_1plane(bm.get_weight_quant_mode());
  841. }
  842. bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
  843. packed_bm_idx++;
  844. bm_counts[j]++;
  845. }
  846. }
  847. bsd.block_mode_count_1plane_always = bm_counts[0];
  848. bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
  849. bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
  850. bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
  851. bsd.decimation_mode_count_always = dm_counts[0];
  852. bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
  853. bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
  854. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  855. assert(bsd.block_mode_count_1plane_always > 0);
  856. assert(bsd.decimation_mode_count_always > 0);
  857. delete[] percentiles;
  858. #endif
  859. // Ensure the end of the array contains valid data (should never get read)
  860. for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  861. {
  862. bsd.decimation_modes[i].maxprec_1plane = -1;
  863. bsd.decimation_modes[i].maxprec_2planes = -1;
  864. bsd.decimation_modes[i].refprec_1plane = 0;
  865. bsd.decimation_modes[i].refprec_2planes = 0;
  866. }
  867. // Determine the texels to use for kmeans clustering.
  868. assign_kmeans_texels(bsd);
  869. delete wb;
  870. }
  871. /**
  872. * @brief Allocate block modes and decimation tables for a single 3D block size.
  873. *
  874. * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
  875. * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
  876. *
  877. * @param x_texels The number of texels in the X dimension.
  878. * @param y_texels The number of texels in the Y dimension.
  879. * @param z_texels The number of texels in the Z dimension.
  880. * @param[out] bsd The block size descriptor to populate.
  881. */
  882. static void construct_block_size_descriptor_3d(
  883. unsigned int x_texels,
  884. unsigned int y_texels,
  885. unsigned int z_texels,
  886. block_size_descriptor& bsd
  887. ) {
  888. // Store a remap table for storing packed decimation modes.
  889. // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6.
  890. static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
  891. int decimation_mode_index[MAX_DMI];
  892. unsigned int decimation_mode_count = 0;
  893. dt_init_working_buffers* wb = new dt_init_working_buffers;
  894. bsd.xdim = static_cast<uint8_t>(x_texels);
  895. bsd.ydim = static_cast<uint8_t>(y_texels);
  896. bsd.zdim = static_cast<uint8_t>(z_texels);
  897. bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
  898. for (unsigned int i = 0; i < MAX_DMI; i++)
  899. {
  900. decimation_mode_index[i] = -1;
  901. }
  902. // gather all the infill-modes that can be used with the current block size
  903. for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
  904. {
  905. for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
  906. {
  907. for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
  908. {
  909. unsigned int weight_count = x_weights * y_weights * z_weights;
  910. if (weight_count > BLOCK_MAX_WEIGHTS)
  911. {
  912. continue;
  913. }
  914. decimation_info& di = bsd.decimation_tables[decimation_mode_count];
  915. decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
  916. init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
  917. int maxprec_1plane = -1;
  918. int maxprec_2planes = -1;
  919. for (unsigned int i = 0; i < 12; i++)
  920. {
  921. unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
  922. if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  923. {
  924. maxprec_1plane = i;
  925. }
  926. unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
  927. if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  928. {
  929. maxprec_2planes = i;
  930. }
  931. }
  932. if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
  933. {
  934. maxprec_2planes = -1;
  935. }
  936. bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
  937. bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
  938. bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
  939. bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
  940. decimation_mode_count++;
  941. }
  942. }
  943. }
  944. // Ensure the end of the array contains valid data (should never get read)
  945. for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  946. {
  947. bsd.decimation_modes[i].maxprec_1plane = -1;
  948. bsd.decimation_modes[i].maxprec_2planes = -1;
  949. bsd.decimation_modes[i].refprec_1plane = 0;
  950. bsd.decimation_modes[i].refprec_2planes = 0;
  951. }
  952. bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
  953. bsd.decimation_mode_count_selected = decimation_mode_count;
  954. bsd.decimation_mode_count_all = decimation_mode_count;
  955. // Construct the list of block formats referencing the decimation tables
  956. // Clear the list to a known-bad value
  957. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  958. {
  959. bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
  960. }
  961. unsigned int packed_idx = 0;
  962. unsigned int bm_counts[2] { 0 };
  963. // Iterate two times to build a usefully ordered list:
  964. // - Pass 0 - keep valid single plane block modes
  965. // - Pass 1 - keep valid dual plane block modes
  966. for (unsigned int j = 0; j < 2; j++)
  967. {
  968. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  969. {
  970. // Skip modes we've already included in a previous pass
  971. if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  972. {
  973. continue;
  974. }
  975. unsigned int x_weights;
  976. unsigned int y_weights;
  977. unsigned int z_weights;
  978. bool is_dual_plane;
  979. unsigned int quant_mode;
  980. unsigned int weight_bits;
  981. bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
  982. // Skip invalid encodings
  983. if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
  984. {
  985. continue;
  986. }
  987. // Skip encodings in the wrong iteration
  988. if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
  989. {
  990. continue;
  991. }
  992. // Always skip encodings we can't physically encode based on bit availability
  993. if (is_dual_plane)
  994. {
  995. // This is the only check we need as only support 1 partition
  996. if ((109 - weight_bits) <= 0)
  997. {
  998. continue;
  999. }
  1000. }
  1001. else
  1002. {
  1003. // This is conservative - fewer bits may be available for > 1 partition
  1004. if ((111 - weight_bits) <= 0)
  1005. {
  1006. continue;
  1007. }
  1008. }
  1009. int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
  1010. bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
  1011. bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
  1012. bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
  1013. bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
  1014. bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
  1015. bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
  1016. bm_counts[j]++;
  1017. packed_idx++;
  1018. }
  1019. }
  1020. bsd.block_mode_count_1plane_always = 0; // Skipped for 3D modes
  1021. bsd.block_mode_count_1plane_selected = bm_counts[0];
  1022. bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
  1023. bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
  1024. // Determine the texels to use for kmeans clustering.
  1025. assign_kmeans_texels(bsd);
  1026. delete wb;
  1027. }
  1028. /* See header for documentation. */
  1029. void init_block_size_descriptor(
  1030. unsigned int x_texels,
  1031. unsigned int y_texels,
  1032. unsigned int z_texels,
  1033. bool can_omit_modes,
  1034. unsigned int partition_count_cutoff,
  1035. float mode_cutoff,
  1036. block_size_descriptor& bsd
  1037. ) {
  1038. if (z_texels > 1)
  1039. {
  1040. construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
  1041. }
  1042. else
  1043. {
  1044. construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
  1045. }
  1046. init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
  1047. }