basisu_astc_helpers.h 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588
  1. // basisu_astc_helpers.h
  2. // Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
  3. #pragma once
  4. #ifndef BASISU_ASTC_HELPERS_HEADER
  5. #define BASISU_ASTC_HELPERS_HEADER
  6. #include <stdlib.h>
  7. #include <stdint.h>
  8. #include <math.h>
  9. #include <fenv.h>
  10. namespace astc_helpers
  11. {
  12. const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]
  13. const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
  14. const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
  15. const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
  16. static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
  17. extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
  18. // The Color Endpoint Modes (CEM's)
  19. enum cems
  20. {
  21. CEM_LDR_LUM_DIRECT = 0,
  22. CEM_LDR_LUM_BASE_PLUS_OFS = 1,
  23. CEM_HDR_LUM_LARGE_RANGE = 2,
  24. CEM_HDR_LUM_SMALL_RANGE = 3,
  25. CEM_LDR_LUM_ALPHA_DIRECT = 4,
  26. CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
  27. CEM_LDR_RGB_BASE_SCALE = 6,
  28. CEM_HDR_RGB_BASE_SCALE = 7,
  29. CEM_LDR_RGB_DIRECT = 8,
  30. CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
  31. CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
  32. CEM_HDR_RGB = 11,
  33. CEM_LDR_RGBA_DIRECT = 12,
  34. CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
  35. CEM_HDR_RGB_LDR_ALPHA = 14,
  36. CEM_HDR_RGB_HDR_ALPHA = 15
  37. };
  38. // All Bounded Integer Sequence Coding (BISE or ISE) ranges.
  39. // Weights: Ranges [0,11] are valid.
  40. // Endpoints: Ranges [4,20] are valid.
  41. enum bise_levels
  42. {
  43. BISE_2_LEVELS = 0,
  44. BISE_3_LEVELS = 1,
  45. BISE_4_LEVELS = 2,
  46. BISE_5_LEVELS = 3,
  47. BISE_6_LEVELS = 4,
  48. BISE_8_LEVELS = 5,
  49. BISE_10_LEVELS = 6,
  50. BISE_12_LEVELS = 7,
  51. BISE_16_LEVELS = 8,
  52. BISE_20_LEVELS = 9,
  53. BISE_24_LEVELS = 10,
  54. BISE_32_LEVELS = 11,
  55. BISE_40_LEVELS = 12,
  56. BISE_48_LEVELS = 13,
  57. BISE_64_LEVELS = 14,
  58. BISE_80_LEVELS = 15,
  59. BISE_96_LEVELS = 16,
  60. BISE_128_LEVELS = 17,
  61. BISE_160_LEVELS = 18,
  62. BISE_192_LEVELS = 19,
  63. BISE_256_LEVELS = 20
  64. };
  65. const uint32_t TOTAL_ISE_RANGES = 21;
  66. // Valid endpoint ISE ranges
  67. const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
  68. const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
  69. const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
  70. // Valid weight ISE ranges
  71. const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
  72. const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
  73. const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
  74. // The ISE range table.
  75. extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
  76. // Possible Color Component Select values, used in dual plane mode.
  77. // The CCS component will be interpolated using the 2nd weight plane.
  78. enum ccs
  79. {
  80. CCS_GBA_R = 0,
  81. CCS_RBA_G = 1,
  82. CCS_RGA_B = 2,
  83. CCS_RGB_A = 3
  84. };
  85. struct astc_block
  86. {
  87. uint32_t m_vals[4];
  88. };
  89. const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode
  90. const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode
  91. const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)
  92. const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block
  93. struct log_astc_block
  94. {
  95. bool m_error_flag;
  96. bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
  97. uint16_t m_solid_color[4];
  98. // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
  99. uint32_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block
  100. bool m_dual_plane;
  101. uint32_t m_weight_ise_range; // 0-11
  102. uint32_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
  103. uint32_t m_color_component_selector; // 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode
  104. uint32_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)
  105. uint32_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1
  106. uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
  107. // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.
  108. uint8_t m_weights[MAX_GRID_WEIGHTS];
  109. // ISE endpoint values
  110. // Endpoint order examples:
  111. // 1 subset LA : LL0 LH0 AL0 AH0
  112. // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
  113. // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
  114. // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
  115. // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
  116. // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
  117. uint8_t m_endpoints[MAX_ENDPOINTS];
  118. void clear()
  119. {
  120. memset(this, 0, sizeof(*this));
  121. }
  122. };
  123. // Open interval
  124. inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
  125. inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
  126. inline uint32_t get_bits(uint32_t val, int low, int high)
  127. {
  128. const int num_bits = (high - low) + 1;
  129. assert((num_bits >= 1) && (num_bits <= 32));
  130. val >>= low;
  131. if (num_bits != 32)
  132. val &= ((1u << num_bits) - 1);
  133. return val;
  134. }
  135. // Returns the number of levels in the given ISE range.
  136. inline uint32_t get_ise_levels(uint32_t ise_range)
  137. {
  138. assert(ise_range < TOTAL_ISE_RANGES);
  139. return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
  140. }
  141. inline int get_ise_sequence_bits(int count, int range)
  142. {
  143. // See 18.22 Data Size Determination
  144. int total_bits = g_ise_range_table[range][0] * count;
  145. total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
  146. total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
  147. return total_bits;
  148. }
  149. inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
  150. {
  151. assert(w <= MAX_WEIGHT_VALUE);
  152. return (l * (64 - w) + h * w + 32) >> 6;
  153. }
  154. void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range);
  155. // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
  156. bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr);
  157. // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
  158. void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a);
  159. // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
  160. void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah);
  161. // These helpers are all quite slow, but are useful for table preparation.
  162. // Dequantizes ISE encoded endpoint val to [0,255]
  163. uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
  164. // Dequantizes ISE encoded weight val to [0,64]
  165. uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
  166. uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
  167. uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
  168. void create_quant_tables(
  169. uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
  170. uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
  171. uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
  172. uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
  173. uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
  174. bool weight_flag); // false if block endpoints, true if weights
  175. // True if the CEM is LDR.
  176. bool is_cem_ldr(uint32_t mode);
  177. inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
  178. // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
  179. bool is_valid_block_size(uint32_t w, uint32_t h);
  180. bool block_has_any_hdr_cems(const log_astc_block& log_blk);
  181. bool block_has_any_ldr_cems(const log_astc_block& log_blk);
  182. // Returns the # of endpoint values for the given CEM.
  183. inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
  184. struct dequant_table
  185. {
  186. basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
  187. basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
  188. basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]
  189. basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
  190. void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)
  191. {
  192. m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);
  193. m_ISE_to_val.resize(num_levels);
  194. if (init_rank_tabs)
  195. {
  196. m_ISE_to_rank.resize(num_levels);
  197. m_rank_to_ISE.resize(num_levels);
  198. }
  199. }
  200. };
  201. struct dequant_tables
  202. {
  203. dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
  204. dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
  205. const dequant_table& get_weight_tab(uint32_t range) const
  206. {
  207. assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
  208. return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
  209. }
  210. dequant_table& get_weight_tab(uint32_t range)
  211. {
  212. assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
  213. return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
  214. }
  215. const dequant_table& get_endpoint_tab(uint32_t range) const
  216. {
  217. assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
  218. return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
  219. }
  220. dequant_table& get_endpoint_tab(uint32_t range)
  221. {
  222. assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
  223. return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
  224. }
  225. void init(bool init_rank_tabs)
  226. {
  227. for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
  228. {
  229. const uint32_t num_levels = get_ise_levels(range);
  230. dequant_table& tab = get_weight_tab(range);
  231. tab.init(true, num_levels, init_rank_tabs);
  232. create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);
  233. }
  234. for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
  235. {
  236. const uint32_t num_levels = get_ise_levels(range);
  237. dequant_table& tab = get_endpoint_tab(range);
  238. tab.init(false, num_levels, init_rank_tabs);
  239. create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);
  240. }
  241. }
  242. };
  243. extern dequant_tables g_dequant_tables;
  244. void init_tables(bool init_rank_tabs);
  245. // Procedurally returns the texel partition/subset index given the block coordinate and config.
  246. int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
  247. void blue_contract(
  248. int r, int g, int b, int a,
  249. int& dr, int& dg, int& db, int& da);
  250. void bit_transfer_signed(int& a, int& b);
  251. void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
  252. typedef uint16_t half_float;
  253. half_float float_to_half(float val, bool toward_zero);
  254. float half_to_float(half_float hval);
  255. const int MAX_RGB9E5 = 0xff80;
  256. void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
  257. uint32_t pack_rgb9e5(float r, float g, float b);
  258. enum decode_mode
  259. {
  260. cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks
  261. cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks
  262. cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks
  263. cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
  264. };
  265. // Decodes logical block to output pixels.
  266. // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
  267. bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
  268. void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
  269. // Unpack a physical ASTC encoded GPU texture block to a logical block description.
  270. bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
  271. } // namespace astc_helpers
  272. #endif // BASISU_ASTC_HELPERS_HEADER
  273. //------------------------------------------------------------------
  274. #ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
  275. namespace astc_helpers
  276. {
  277. template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
  278. template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
  279. const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {
  280. { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },
  281. { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },
  282. { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },
  283. { 12, 10 }, { 12, 12 }
  284. };
  285. const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
  286. {
  287. //b t q
  288. //2 3 5 // rng ise_index notes
  289. { 1, 0, 0 }, // 0..1 0
  290. { 0, 1, 0 }, // 0..2 1
  291. { 2, 0, 0 }, // 0..3 2
  292. { 0, 0, 1 }, // 0..4 3
  293. { 1, 1, 0 }, // 0..5 4 min endpoint ISE index
  294. { 3, 0, 0 }, // 0..7 5
  295. { 1, 0, 1 }, // 0..9 6
  296. { 2, 1, 0 }, // 0..11 7
  297. { 4, 0, 0 }, // 0..15 8
  298. { 2, 0, 1 }, // 0..19 9
  299. { 3, 1, 0 }, // 0..23 10
  300. { 5, 0, 0 }, // 0..31 11 max weight ISE index
  301. { 3, 0, 1 }, // 0..39 12
  302. { 4, 1, 0 }, // 0..47 13
  303. { 6, 0, 0 }, // 0..63 14
  304. { 4, 0, 1 }, // 0..79 15
  305. { 5, 1, 0 }, // 0..95 16
  306. { 7, 0, 0 }, // 0..127 17
  307. { 5, 0, 1 }, // 0..159 18
  308. { 6, 1, 0 }, // 0..191 19
  309. { 8, 0, 0 }, // 0..255 20
  310. };
  311. static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
  312. {
  313. uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
  314. assert(codesize <= 9);
  315. if (codesize)
  316. {
  317. uint32_t byte_bit_offset = bit_offset & 7;
  318. uint32_t val = code << byte_bit_offset;
  319. uint32_t index = bit_offset >> 3;
  320. pBuf[index] |= (uint8_t)val;
  321. if (codesize > (8 - byte_bit_offset))
  322. pBuf[index + 1] |= (uint8_t)(val >> 8);
  323. bit_offset += codesize;
  324. }
  325. }
  326. static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
  327. {
  328. return (bits >> low) & ((1 << (high - low + 1)) - 1);
  329. }
  330. // Writes bits to output in an endian safe way
  331. static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
  332. {
  333. assert(total_bits <= 31);
  334. assert(value < (1u << total_bits));
  335. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
  336. while (total_bits)
  337. {
  338. const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
  339. pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
  340. bit_pos += bits_to_write;
  341. total_bits -= bits_to_write;
  342. value >>= bits_to_write;
  343. }
  344. }
  345. static const uint8_t g_astc_quint_encode[125] =
  346. {
  347. 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
  348. 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
  349. 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
  350. 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
  351. };
  352. // Encodes 3 values to output, usable for any range that uses quints and bits
  353. static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
  354. {
  355. // First extract the quints and the bits from the 3 input values
  356. int quints = 0, bits[3];
  357. const uint32_t bit_mask = (1 << n) - 1;
  358. for (int i = 0; i < 3; i++)
  359. {
  360. static const int s_muls[3] = { 1, 5, 25 };
  361. const int t = pValues[i] >> n;
  362. quints += t * s_muls[i];
  363. bits[i] = pValues[i] & bit_mask;
  364. }
  365. // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
  366. // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
  367. assert(quints < 125);
  368. const int T = g_astc_quint_encode[quints];
  369. // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
  370. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
  371. (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
  372. }
  373. static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
  374. 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
  375. 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
  376. 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
  377. 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
  378. 191, 223, 124, 125, 126 };
  379. // Encodes 5 values to output, usable for any range that uses trits and bits
  380. static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
  381. {
  382. // First extract the trits and the bits from the 5 input values
  383. int trits = 0, bits[5];
  384. const uint32_t bit_mask = (1 << n) - 1;
  385. for (int i = 0; i < 5; i++)
  386. {
  387. static const int s_muls[5] = { 1, 3, 9, 27, 81 };
  388. const int t = pValues[i] >> n;
  389. trits += t * s_muls[i];
  390. bits[i] = pValues[i] & bit_mask;
  391. }
  392. // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
  393. // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
  394. assert(trits < 243);
  395. const int T = g_astc_trit_encode[trits];
  396. // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
  397. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
  398. astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
  399. (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
  400. }
  401. // Packs values using ASTC's BISE to output buffer.
  402. void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range)
  403. {
  404. uint32_t temp[5] = { 0 };
  405. const int num_bits = g_ise_range_table[range][0];
  406. int group_size = 0;
  407. if (g_ise_range_table[range][1])
  408. group_size = 5;
  409. else if (g_ise_range_table[range][2])
  410. group_size = 3;
  411. #ifndef NDEBUG
  412. const uint32_t num_levels = get_ise_levels(range);
  413. for (int i = 0; i < num_vals; i++)
  414. {
  415. assert(pSrc_vals[i] < num_levels);
  416. }
  417. #endif
  418. if (group_size)
  419. {
  420. // Range has trits or quints - pack each group of 5 or 3 values
  421. const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
  422. for (int group_index = 0; group_index < total_groups; group_index++)
  423. {
  424. uint8_t vals[5] = { 0 };
  425. const int limit = my_min(group_size, num_vals - group_index * group_size);
  426. for (int i = 0; i < limit; i++)
  427. vals[i] = pSrc_vals[group_index * group_size + i];
  428. if (group_size == 5)
  429. astc_encode_trits(temp, vals, bit_pos, num_bits);
  430. else
  431. astc_encode_quints(temp, vals, bit_pos, num_bits);
  432. }
  433. }
  434. else
  435. {
  436. for (int i = 0; i < num_vals; i++)
  437. astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
  438. }
  439. // TODO: Could this write too many bits on incomplete blocks?
  440. pDst[0] |= temp[0]; pDst[1] |= temp[1];
  441. pDst[2] |= temp[2]; pDst[3] |= temp[3];
  442. }
  443. inline uint32_t rev_dword(uint32_t bits)
  444. {
  445. uint32_t v = (bits << 16) | (bits >> 16);
  446. v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
  447. v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
  448. return v;
  449. }
  450. static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
  451. static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
  452. {
  453. config_bits = 0;
  454. const int W = log_block.m_grid_width, H = log_block.m_grid_height;
  455. const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
  456. const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
  457. // See Tables 81-82
  458. // Compute p from weight range
  459. uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
  460. // Rearrange p's bits to p0 p2 p1
  461. p = (p >> 1) + ((p & 1) << 2);
  462. // Try encoding each row of table 82.
  463. // W+4 H+2
  464. if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
  465. {
  466. config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
  467. return true;
  468. }
  469. // W+8 H+2
  470. if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
  471. {
  472. config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
  473. return true;
  474. }
  475. // W+2 H+8
  476. if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
  477. {
  478. config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
  479. return true;
  480. }
  481. // W+2 H+6
  482. if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
  483. {
  484. config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
  485. return true;
  486. }
  487. // W+2 H+2
  488. if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
  489. {
  490. config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
  491. return true;
  492. }
  493. // 12 H+2
  494. if ((W == 12) && is_packable(H - 2, 2))
  495. {
  496. config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
  497. return true;
  498. }
  499. // W+2 12
  500. if ((H == 12) && is_packable(W - 2, 2))
  501. {
  502. config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
  503. return true;
  504. }
  505. // 6 10
  506. if ((W == 6) && (H == 10))
  507. {
  508. config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
  509. return true;
  510. }
  511. // 10 6
  512. if ((W == 10) && (H == 6))
  513. {
  514. config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
  515. return true;
  516. }
  517. // W+6 H+6 (no dual plane or high prec)
  518. if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
  519. {
  520. config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
  521. return true;
  522. }
  523. // Failed: unsupported weight grid dimensions or config.
  524. return false;
  525. }
  526. bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range)
  527. {
  528. memset(&phys_block, 0, sizeof(phys_block));
  529. if (pExpected_endpoint_range)
  530. *pExpected_endpoint_range = -1;
  531. assert(!log_block.m_error_flag);
  532. if (log_block.m_error_flag)
  533. return false;
  534. if (log_block.m_solid_color_flag_ldr)
  535. {
  536. pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
  537. return true;
  538. }
  539. else if (log_block.m_solid_color_flag_hdr)
  540. {
  541. pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
  542. return true;
  543. }
  544. if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
  545. return false;
  546. // Max usable weight range is 11
  547. if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
  548. return false;
  549. // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
  550. if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
  551. return false;
  552. if (log_block.m_color_component_selector > 3)
  553. return false;
  554. uint32_t config_bits = 0;
  555. if (!get_config_bits(log_block, config_bits))
  556. return false;
  557. uint32_t bit_pos = 0;
  558. astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
  559. const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
  560. const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
  561. // 18.24 Illegal Encodings
  562. if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
  563. return false;
  564. uint32_t total_extra_bits = 0;
  565. astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
  566. if (log_block.m_num_partitions > 1)
  567. {
  568. if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
  569. return false;
  570. astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
  571. uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
  572. for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
  573. {
  574. highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]);
  575. lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]);
  576. }
  577. if (highest_cem > 15)
  578. return false;
  579. // Ensure CEM range is contiguous
  580. if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
  581. return false;
  582. // See tables 79/80
  583. uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
  584. if (lowest_cem != highest_cem)
  585. {
  586. encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
  587. // See tables at 23.11 Color Endpoint Mode
  588. for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
  589. {
  590. const int M = log_block.m_color_endpoint_modes[j] & 3;
  591. const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
  592. if ((C & 1) != C)
  593. return false;
  594. encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
  595. }
  596. total_extra_bits = 3 * log_block.m_num_partitions - 4;
  597. if ((total_weight_bits + total_extra_bits) > 128)
  598. return false;
  599. uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
  600. astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
  601. }
  602. astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
  603. }
  604. else
  605. {
  606. if (log_block.m_partition_id)
  607. return false;
  608. if (log_block.m_color_endpoint_modes[0] > 15)
  609. return false;
  610. astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
  611. }
  612. if (log_block.m_dual_plane)
  613. {
  614. if (log_block.m_num_partitions > 3)
  615. return false;
  616. total_extra_bits += 2;
  617. uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
  618. astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
  619. }
  620. const uint32_t total_config_bits = bit_pos + total_extra_bits;
  621. const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
  622. if (num_remaining_bits < 0)
  623. return false;
  624. uint32_t total_cem_vals = 0;
  625. for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
  626. total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
  627. if (total_cem_vals > MAX_ENDPOINTS)
  628. return false;
  629. int endpoint_ise_range = -1;
  630. for (int k = 20; k > 0; k--)
  631. {
  632. int bits = get_ise_sequence_bits(total_cem_vals, k);
  633. if (bits <= num_remaining_bits)
  634. {
  635. endpoint_ise_range = k;
  636. break;
  637. }
  638. }
  639. // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
  640. if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
  641. return false;
  642. // Ensure the caller utilized the right endpoint ISE range.
  643. if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
  644. {
  645. if (pExpected_endpoint_range)
  646. *pExpected_endpoint_range = endpoint_ise_range;
  647. return false;
  648. }
  649. // Pack endpoints forwards
  650. encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
  651. // Pack weights backwards
  652. uint32_t weight_data[4] = { 0 };
  653. encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
  654. for (uint32_t i = 0; i < 4; i++)
  655. phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
  656. return true;
  657. }
  658. static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
  659. {
  660. assert(num_src_bits <= num_dst_bits);
  661. assert((src & ((1 << num_src_bits) - 1)) == src);
  662. uint32_t dst = 0;
  663. for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
  664. dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
  665. return dst;
  666. }
  667. uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
  668. {
  669. assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
  670. assert(val < get_ise_levels(ise_range));
  671. uint32_t u = 0;
  672. switch (ise_range)
  673. {
  674. case 5:
  675. {
  676. u = bit_replication_scale(val, 3, 8);
  677. break;
  678. }
  679. case 8:
  680. {
  681. u = bit_replication_scale(val, 4, 8);
  682. break;
  683. }
  684. case 11:
  685. {
  686. u = bit_replication_scale(val, 5, 8);
  687. break;
  688. }
  689. case 14:
  690. {
  691. u = bit_replication_scale(val, 6, 8);
  692. break;
  693. }
  694. case 17:
  695. {
  696. u = bit_replication_scale(val, 7, 8);
  697. break;
  698. }
  699. case 20:
  700. {
  701. u = val;
  702. break;
  703. }
  704. case 4:
  705. case 6:
  706. case 7:
  707. case 9:
  708. case 10:
  709. case 12:
  710. case 13:
  711. case 15:
  712. case 16:
  713. case 18:
  714. case 19:
  715. {
  716. const uint32_t num_bits = g_ise_range_table[ise_range][0];
  717. const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
  718. const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
  719. // compute Table 103 row index
  720. const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
  721. assert(range_index >= 0 && range_index <= 10);
  722. uint32_t bits = val & ((1 << num_bits) - 1);
  723. uint32_t tval = val >> num_bits;
  724. assert(tval < (num_trits ? 3U : 5U));
  725. uint32_t a = bits & 1;
  726. uint32_t b = (bits >> 1) & 1;
  727. uint32_t c = (bits >> 2) & 1;
  728. uint32_t d = (bits >> 3) & 1;
  729. uint32_t e = (bits >> 4) & 1;
  730. uint32_t f = (bits >> 5) & 1;
  731. uint32_t A = a ? 511 : 0;
  732. uint32_t B = 0;
  733. switch (range_index)
  734. {
  735. case 2:
  736. {
  737. // 876543210
  738. // b000b0bb0
  739. B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
  740. break;
  741. }
  742. case 3:
  743. {
  744. // 876543210
  745. // b0000bb00
  746. B = (b << 2) | (b << 3) | (b << 8);
  747. break;
  748. }
  749. case 4:
  750. {
  751. // 876543210
  752. // cb000cbcb
  753. B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
  754. break;
  755. }
  756. case 5:
  757. {
  758. // 876543210
  759. // cb0000cbc
  760. B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
  761. break;
  762. }
  763. case 6:
  764. {
  765. // 876543210
  766. // dcb000dcb
  767. B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
  768. break;
  769. }
  770. case 7:
  771. {
  772. // 876543210
  773. // dcb0000dc
  774. B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
  775. break;
  776. }
  777. case 8:
  778. {
  779. // 876543210
  780. // edcb000ed
  781. B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
  782. break;
  783. }
  784. case 9:
  785. {
  786. // 876543210
  787. // edcb0000e
  788. B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
  789. break;
  790. }
  791. case 10:
  792. {
  793. // 876543210
  794. // fedcb000f
  795. B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
  796. break;
  797. }
  798. default:
  799. break;
  800. }
  801. static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
  802. uint32_t C = C_vals[range_index];
  803. uint32_t D = tval;
  804. u = D * C + B;
  805. u = u ^ A;
  806. u = (A & 0x80) | (u >> 2);
  807. break;
  808. }
  809. default:
  810. {
  811. assert(0);
  812. break;
  813. }
  814. }
  815. return u;
  816. }
  817. uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
  818. {
  819. assert(val < get_ise_levels(ise_range));
  820. uint32_t u = 0;
  821. switch (ise_range)
  822. {
  823. case 0:
  824. {
  825. u = val ? 63 : 0;
  826. break;
  827. }
  828. case 1: // 0-2
  829. {
  830. const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
  831. u = s_tab_0_2[val];
  832. break;
  833. }
  834. case 2: // 0-3
  835. {
  836. u = bit_replication_scale(val, 2, 6);
  837. break;
  838. }
  839. case 3: // 0-4
  840. {
  841. const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
  842. u = s_tab_0_4[val];
  843. break;
  844. }
  845. case 5: // 0-7
  846. {
  847. u = bit_replication_scale(val, 3, 6);
  848. break;
  849. }
  850. case 8: // 0-15
  851. {
  852. u = bit_replication_scale(val, 4, 6);
  853. break;
  854. }
  855. case 11: // 0-31
  856. {
  857. u = bit_replication_scale(val, 5, 6);
  858. break;
  859. }
  860. case 4: // 0-5
  861. case 6: // 0-9
  862. case 7: // 0-11
  863. case 9: // 0-19
  864. case 10: // 0-23
  865. {
  866. const uint32_t num_bits = g_ise_range_table[ise_range][0];
  867. const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
  868. const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
  869. // compute Table 103 row index
  870. const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
  871. // Extract bits and tris/quints from value
  872. const uint32_t bits = val & ((1u << num_bits) - 1);
  873. const uint32_t D = val >> num_bits;
  874. assert(D < (num_trits ? 3U : 5U));
  875. // Now dequantize
  876. // See Table 103. ASTC weight unquantization parameters
  877. static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
  878. const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
  879. const uint32_t A = (a == 0) ? 0 : 0x7F;
  880. uint32_t B = 0;
  881. if (range_index == 4)
  882. B = ((b << 6) | (b << 2) | (b << 0));
  883. else if (range_index == 5)
  884. B = ((b << 6) | (b << 1));
  885. else if (range_index == 6)
  886. B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
  887. const uint32_t C = C_table[range_index - 2];
  888. u = D * C + B;
  889. u = u ^ A;
  890. u = (A & 0x20) | (u >> 2);
  891. break;
  892. }
  893. default:
  894. assert(0);
  895. break;
  896. }
  897. if (u > 32)
  898. u++;
  899. return u;
  900. }
  901. // Returns the nearest ISE symbol given a [0,255] endpoint value.
  902. uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
  903. {
  904. assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
  905. const uint32_t total_levels = get_ise_levels(ise_range);
  906. int best_e = INT_MAX, best_index = 0;
  907. for (uint32_t i = 0; i < total_levels; i++)
  908. {
  909. const int qv = dequant_bise_endpoint(i, ise_range);
  910. int e = labs(v - qv);
  911. if (e < best_e)
  912. {
  913. best_e = e;
  914. best_index = i;
  915. if (!best_e)
  916. break;
  917. }
  918. }
  919. return best_index;
  920. }
  921. // Returns the nearest ISE weight given a [0,64] endpoint value.
  922. uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
  923. {
  924. assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
  925. assert(v <= (int)MAX_WEIGHT_VALUE);
  926. const uint32_t total_levels = get_ise_levels(ise_range);
  927. int best_e = INT_MAX, best_index = 0;
  928. for (uint32_t i = 0; i < total_levels; i++)
  929. {
  930. const int qv = dequant_bise_weight(i, ise_range);
  931. int e = labs(v - qv);
  932. if (e < best_e)
  933. {
  934. best_e = e;
  935. best_index = i;
  936. if (!best_e)
  937. break;
  938. }
  939. }
  940. return best_index;
  941. }
  942. void create_quant_tables(
  943. uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
  944. uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
  945. uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
  946. uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
  947. uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
  948. bool weight_flag) // false if block endpoints, true if weights
  949. {
  950. const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;
  951. for (uint32_t i = 0; i < num_dequant_vals; i++)
  952. {
  953. uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
  954. if (pVal_to_ise)
  955. pVal_to_ise[i] = (uint8_t)bise_index;
  956. if (pISE_to_val)
  957. pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
  958. }
  959. if (pISE_to_rank || pRank_to_ISE)
  960. {
  961. const uint32_t num_levels = get_ise_levels(ise_range);
  962. if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
  963. {
  964. // Only bits
  965. for (uint32_t i = 0; i < num_levels; i++)
  966. {
  967. if (pISE_to_rank)
  968. pISE_to_rank[i] = (uint8_t)i;
  969. if (pRank_to_ISE)
  970. pRank_to_ISE[i] = (uint8_t)i;
  971. }
  972. }
  973. else
  974. {
  975. // Range has trits or quints
  976. uint32_t vals[256];
  977. for (uint32_t i = 0; i < num_levels; i++)
  978. {
  979. uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
  980. // Low=ISE value
  981. // High=dequantized value
  982. vals[i] = (v << 16) | i;
  983. }
  984. // Sorts by dequantized value
  985. std::sort(vals, vals + num_levels);
  986. for (uint32_t rank = 0; rank < num_levels; rank++)
  987. {
  988. uint32_t ise_val = (uint8_t)vals[rank];
  989. if (pISE_to_rank)
  990. pISE_to_rank[ise_val] = (uint8_t)rank;
  991. if (pRank_to_ISE)
  992. pRank_to_ISE[rank] = (uint8_t)ise_val;
  993. }
  994. }
  995. }
  996. }
  997. void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
  998. {
  999. uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
  1000. memset(pDst, 0xFF, 16);
  1001. pDst[0] = 0b11111100;
  1002. pDst[1] = 0b11111101;
  1003. pDst[8] = (uint8_t)rh;
  1004. pDst[9] = (uint8_t)(rh >> 8);
  1005. pDst[10] = (uint8_t)gh;
  1006. pDst[11] = (uint8_t)(gh >> 8);
  1007. pDst[12] = (uint8_t)bh;
  1008. pDst[13] = (uint8_t)(bh >> 8);
  1009. pDst[14] = (uint8_t)ah;
  1010. pDst[15] = (uint8_t)(ah >> 8);
  1011. }
  1012. // rh-ah are half-floats
  1013. void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
  1014. {
  1015. uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
  1016. memset(pDst, 0xFF, 16);
  1017. pDst[0] = 0b11111100;
  1018. pDst[8] = (uint8_t)rh;
  1019. pDst[9] = (uint8_t)(rh >> 8);
  1020. pDst[10] = (uint8_t)gh;
  1021. pDst[11] = (uint8_t)(gh >> 8);
  1022. pDst[12] = (uint8_t)bh;
  1023. pDst[13] = (uint8_t)(bh >> 8);
  1024. pDst[14] = (uint8_t)ah;
  1025. pDst[15] = (uint8_t)(ah >> 8);
  1026. }
  1027. bool is_cem_ldr(uint32_t mode)
  1028. {
  1029. switch (mode)
  1030. {
  1031. case CEM_LDR_LUM_DIRECT:
  1032. case CEM_LDR_LUM_BASE_PLUS_OFS:
  1033. case CEM_LDR_LUM_ALPHA_DIRECT:
  1034. case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
  1035. case CEM_LDR_RGB_BASE_SCALE:
  1036. case CEM_LDR_RGB_DIRECT:
  1037. case CEM_LDR_RGB_BASE_PLUS_OFFSET:
  1038. case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
  1039. case CEM_LDR_RGBA_DIRECT:
  1040. case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
  1041. return true;
  1042. default:
  1043. break;
  1044. }
  1045. return false;
  1046. }
  1047. bool is_valid_block_size(uint32_t w, uint32_t h)
  1048. {
  1049. assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));
  1050. assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));
  1051. #define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
  1052. SIZECHK(4, 4);
  1053. SIZECHK(5, 4);
  1054. SIZECHK(5, 5);
  1055. SIZECHK(6, 5);
  1056. SIZECHK(6, 6);
  1057. SIZECHK(8, 5);
  1058. SIZECHK(8, 6);
  1059. SIZECHK(10, 5);
  1060. SIZECHK(10, 6);
  1061. SIZECHK(8, 8);
  1062. SIZECHK(10, 8);
  1063. SIZECHK(10, 10);
  1064. SIZECHK(12, 10);
  1065. SIZECHK(12, 12);
  1066. #undef SIZECHK
  1067. return false;
  1068. }
  1069. bool block_has_any_hdr_cems(const log_astc_block& log_blk)
  1070. {
  1071. assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
  1072. for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
  1073. if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
  1074. return true;
  1075. return false;
  1076. }
  1077. bool block_has_any_ldr_cems(const log_astc_block& log_blk)
  1078. {
  1079. assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
  1080. for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
  1081. if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
  1082. return true;
  1083. return false;
  1084. }
  1085. dequant_tables g_dequant_tables;
  1086. void precompute_texel_partitions_4x4();
  1087. void init_tables(bool init_rank_tabs)
  1088. {
  1089. g_dequant_tables.init(init_rank_tabs);
  1090. precompute_texel_partitions_4x4();
  1091. }
  1092. struct weighted_sample
  1093. {
  1094. uint8_t m_src_x;
  1095. uint8_t m_src_y;
  1096. uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
  1097. };
  1098. static void compute_upsample_weights(
  1099. int block_width, int block_height,
  1100. int weight_grid_width, int weight_grid_height,
  1101. weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
  1102. {
  1103. const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
  1104. const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
  1105. for (int texelY = 0; texelY < block_height; texelY++)
  1106. {
  1107. for (int texelX = 0; texelX < block_width; texelX++)
  1108. {
  1109. const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
  1110. const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
  1111. const uint32_t jX = gX >> 4;
  1112. const uint32_t jY = gY >> 4;
  1113. const uint32_t fX = gX & 0xf;
  1114. const uint32_t fY = gY & 0xf;
  1115. const uint32_t w11 = (fX * fY + 8) >> 4;
  1116. const uint32_t w10 = fY - w11;
  1117. const uint32_t w01 = fX - w11;
  1118. const uint32_t w00 = 16 - fX - fY + w11;
  1119. weighted_sample& s = pWeights[texelX + texelY * block_width];
  1120. s.m_src_x = (uint8_t)jX;
  1121. s.m_src_y = (uint8_t)jY;
  1122. s.m_weights[0][0] = (uint8_t)w00;
  1123. s.m_weights[0][1] = (uint8_t)w01;
  1124. s.m_weights[1][0] = (uint8_t)w10;
  1125. s.m_weights[1][1] = (uint8_t)w11;
  1126. }
  1127. }
  1128. }
  1129. // Should be dequantized [0,64] weights
  1130. static void upsample_weight_grid(
  1131. uint32_t bx, uint32_t by, // destination/to dimension
  1132. uint32_t wx, uint32_t wy, // source/from dimension
  1133. const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
  1134. uint8_t* pDst_weights) // [by][bx]
  1135. {
  1136. assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
  1137. assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
  1138. const uint32_t total_src_weights = wx * wy;
  1139. const uint32_t total_dst_weights = bx * by;
  1140. if (total_src_weights == total_dst_weights)
  1141. {
  1142. memcpy(pDst_weights, pSrc_weights, total_src_weights);
  1143. return;
  1144. }
  1145. weighted_sample weights[12 * 12];
  1146. compute_upsample_weights(bx, by, wx, wy, weights);
  1147. const weighted_sample* pS = weights;
  1148. for (uint32_t y = 0; y < by; y++)
  1149. {
  1150. for (uint32_t x = 0; x < bx; x++, ++pS)
  1151. {
  1152. const uint32_t w00 = pS->m_weights[0][0];
  1153. const uint32_t w01 = pS->m_weights[0][1];
  1154. const uint32_t w10 = pS->m_weights[1][0];
  1155. const uint32_t w11 = pS->m_weights[1][1];
  1156. assert(w00 || w01 || w10 || w11);
  1157. const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
  1158. uint32_t total = 8;
  1159. if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
  1160. if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
  1161. if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
  1162. if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
  1163. pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
  1164. }
  1165. }
  1166. }
  1167. inline uint32_t hash52(uint32_t v)
  1168. {
  1169. uint32_t p = v;
  1170. p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
  1171. p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
  1172. p ^= p << 6; p ^= p >> 17;
  1173. return p;
  1174. }
  1175. int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
  1176. {
  1177. assert(zIn == 0);
  1178. const uint32_t x = small_block ? xIn << 1 : xIn;
  1179. const uint32_t y = small_block ? yIn << 1 : yIn;
  1180. const uint32_t z = small_block ? zIn << 1 : zIn;
  1181. const uint32_t seed = seedIn + 1024 * (num_partitions - 1);
  1182. const uint32_t rnum = hash52(seed);
  1183. uint8_t seed1 = (uint8_t)(rnum & 0xf);
  1184. uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
  1185. uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
  1186. uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
  1187. uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
  1188. uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
  1189. uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
  1190. uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
  1191. uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
  1192. uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
  1193. uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
  1194. uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
  1195. seed1 = (uint8_t)(seed1 * seed1);
  1196. seed2 = (uint8_t)(seed2 * seed2);
  1197. seed3 = (uint8_t)(seed3 * seed3);
  1198. seed4 = (uint8_t)(seed4 * seed4);
  1199. seed5 = (uint8_t)(seed5 * seed5);
  1200. seed6 = (uint8_t)(seed6 * seed6);
  1201. seed7 = (uint8_t)(seed7 * seed7);
  1202. seed8 = (uint8_t)(seed8 * seed8);
  1203. seed9 = (uint8_t)(seed9 * seed9);
  1204. seed10 = (uint8_t)(seed10 * seed10);
  1205. seed11 = (uint8_t)(seed11 * seed11);
  1206. seed12 = (uint8_t)(seed12 * seed12);
  1207. const int shA = (seed & 2) != 0 ? 4 : 5;
  1208. const int shB = (num_partitions == 3) ? 6 : 5;
  1209. const int sh1 = (seed & 1) != 0 ? shA : shB;
  1210. const int sh2 = (seed & 1) != 0 ? shB : shA;
  1211. const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
  1212. seed1 = (uint8_t)(seed1 >> sh1);
  1213. seed2 = (uint8_t)(seed2 >> sh2);
  1214. seed3 = (uint8_t)(seed3 >> sh1);
  1215. seed4 = (uint8_t)(seed4 >> sh2);
  1216. seed5 = (uint8_t)(seed5 >> sh1);
  1217. seed6 = (uint8_t)(seed6 >> sh2);
  1218. seed7 = (uint8_t)(seed7 >> sh1);
  1219. seed8 = (uint8_t)(seed8 >> sh2);
  1220. seed9 = (uint8_t)(seed9 >> sh3);
  1221. seed10 = (uint8_t)(seed10 >> sh3);
  1222. seed11 = (uint8_t)(seed11 >> sh3);
  1223. seed12 = (uint8_t)(seed12 >> sh3);
  1224. const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
  1225. const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
  1226. const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
  1227. const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
  1228. return (a >= b && a >= c && a >= d) ? 0
  1229. : (b >= c && b >= d) ? 1
  1230. : (c >= d) ? 2
  1231. : 3;
  1232. }
  1233. static uint32_t g_texel_partitions_4x4[1024][2];
  1234. void precompute_texel_partitions_4x4()
  1235. {
  1236. for (uint32_t p = 0; p < 1024; p++)
  1237. {
  1238. uint32_t v2 = 0, v3 = 0;
  1239. for (uint32_t y = 0; y < 4; y++)
  1240. {
  1241. for (uint32_t x = 0; x < 4; x++)
  1242. {
  1243. const uint32_t shift = x * 2 + y * 8;
  1244. v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);
  1245. v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);
  1246. }
  1247. }
  1248. g_texel_partitions_4x4[p][0] = v2;
  1249. g_texel_partitions_4x4[p][1] = v3;
  1250. }
  1251. }
  1252. static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
  1253. {
  1254. assert(g_texel_partitions_4x4[1][0]);
  1255. assert(seed < 1024);
  1256. assert((x <= 3) && (y <= 3));
  1257. assert((num_partitions >= 2) && (num_partitions <= 3));
  1258. const uint32_t shift = x * 2 + y * 8;
  1259. return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
  1260. }
  1261. void blue_contract(
  1262. int r, int g, int b, int a,
  1263. int &dr, int &dg, int &db, int &da)
  1264. {
  1265. dr = (r + b) >> 1;
  1266. dg = (g + b) >> 1;
  1267. db = b;
  1268. da = a;
  1269. }
  1270. inline void bit_transfer_signed(int& a, int& b)
  1271. {
  1272. b >>= 1;
  1273. b |= (a & 0x80);
  1274. a >>= 1;
  1275. a &= 0x3F;
  1276. if ((a & 0x20) != 0)
  1277. a -= 0x40;
  1278. }
  1279. static inline int clamp(int a, int l, int h)
  1280. {
  1281. if (a < l)
  1282. a = l;
  1283. else if (a > h)
  1284. a = h;
  1285. return a;
  1286. }
  1287. static inline float clampf(float a, float l, float h)
  1288. {
  1289. if (a < l)
  1290. a = l;
  1291. else if (a > h)
  1292. a = h;
  1293. return a;
  1294. }
  1295. inline int sign_extend(int src, int num_src_bits)
  1296. {
  1297. assert((num_src_bits >= 2) && (num_src_bits <= 31));
  1298. const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
  1299. if (negative)
  1300. return src | ~((1 << num_src_bits) - 1);
  1301. else
  1302. return src & ((1 << num_src_bits) - 1);
  1303. }
  1304. // endpoints is [4][2]
  1305. void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
  1306. {
  1307. assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
  1308. int v0 = pE[0], v1 = pE[1];
  1309. int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
  1310. int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
  1311. switch (cem_index)
  1312. {
  1313. case CEM_LDR_LUM_DIRECT:
  1314. {
  1315. e0_r = v0; e1_r = v1;
  1316. e0_g = v0; e1_g = v1;
  1317. e0_b = v0; e1_b = v1;
  1318. e0_a = 0xFF; e1_a = 0xFF;
  1319. break;
  1320. }
  1321. case CEM_LDR_LUM_BASE_PLUS_OFS:
  1322. {
  1323. int l0 = (v0 >> 2) | (v1 & 0xc0);
  1324. int l1 = l0 + (v1 & 0x3f);
  1325. if (l1 > 0xFF)
  1326. l1 = 0xFF;
  1327. e0_r = l0; e1_r = l1;
  1328. e0_g = l0; e1_g = l1;
  1329. e0_b = l0; e1_b = l1;
  1330. e0_a = 0xFF; e1_a = 0xFF;
  1331. break;
  1332. }
  1333. case CEM_LDR_LUM_ALPHA_DIRECT:
  1334. {
  1335. int v2 = pE[2], v3 = pE[3];
  1336. e0_r = v0; e1_r = v1;
  1337. e0_g = v0; e1_g = v1;
  1338. e0_b = v0; e1_b = v1;
  1339. e0_a = v2; e1_a = v3;
  1340. break;
  1341. }
  1342. case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
  1343. {
  1344. int v2 = pE[2], v3 = pE[3];
  1345. bit_transfer_signed(v1, v0);
  1346. bit_transfer_signed(v3, v2);
  1347. e0_r = v0; e1_r = v0 + v1;
  1348. e0_g = v0; e1_g = v0 + v1;
  1349. e0_b = v0; e1_b = v0 + v1;
  1350. e0_a = v2; e1_a = v2 + v3;
  1351. for (uint32_t c = 0; c < 4; c++)
  1352. {
  1353. pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
  1354. pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
  1355. }
  1356. break;
  1357. }
  1358. case CEM_LDR_RGB_BASE_SCALE:
  1359. {
  1360. int v2 = pE[2], v3 = pE[3];
  1361. e0_r = (v0 * v3) >> 8; e1_r = v0;
  1362. e0_g = (v1 * v3) >> 8; e1_g = v1;
  1363. e0_b = (v2 * v3) >> 8; e1_b = v2;
  1364. e0_a = 0xFF; e1_a = 0xFF;
  1365. break;
  1366. }
  1367. case CEM_LDR_RGB_DIRECT:
  1368. {
  1369. int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
  1370. if ((v1 + v3 + v5) >= (v0 + v2 + v4))
  1371. {
  1372. e0_r = v0; e1_r = v1;
  1373. e0_g = v2; e1_g = v3;
  1374. e0_b = v4; e1_b = v5;
  1375. e0_a = 0xFF; e1_a = 0xFF;
  1376. }
  1377. else
  1378. {
  1379. blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
  1380. blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
  1381. }
  1382. break;
  1383. }
  1384. case CEM_LDR_RGB_BASE_PLUS_OFFSET:
  1385. {
  1386. int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
  1387. bit_transfer_signed(v1, v0);
  1388. bit_transfer_signed(v3, v2);
  1389. bit_transfer_signed(v5, v4);
  1390. if ((v1 + v3 + v5) >= 0)
  1391. {
  1392. e0_r = v0; e1_r = v0 + v1;
  1393. e0_g = v2; e1_g = v2 + v3;
  1394. e0_b = v4; e1_b = v4 + v5;
  1395. e0_a = 0xFF; e1_a = 0xFF;
  1396. }
  1397. else
  1398. {
  1399. blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
  1400. blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
  1401. }
  1402. for (uint32_t c = 0; c < 4; c++)
  1403. {
  1404. pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
  1405. pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
  1406. }
  1407. break;
  1408. }
  1409. case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
  1410. {
  1411. int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
  1412. e0_r = (v0 * v3) >> 8; e1_r = v0;
  1413. e0_g = (v1 * v3) >> 8; e1_g = v1;
  1414. e0_b = (v2 * v3) >> 8; e1_b = v2;
  1415. e0_a = v4; e1_a = v5;
  1416. break;
  1417. }
  1418. case CEM_LDR_RGBA_DIRECT:
  1419. {
  1420. int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
  1421. if ((v1 + v3 + v5) >= (v0 + v2 + v4))
  1422. {
  1423. e0_r = v0; e1_r = v1;
  1424. e0_g = v2; e1_g = v3;
  1425. e0_b = v4; e1_b = v5;
  1426. e0_a = v6; e1_a = v7;
  1427. }
  1428. else
  1429. {
  1430. blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
  1431. blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
  1432. }
  1433. break;
  1434. }
  1435. case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
  1436. {
  1437. int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
  1438. bit_transfer_signed(v1, v0);
  1439. bit_transfer_signed(v3, v2);
  1440. bit_transfer_signed(v5, v4);
  1441. bit_transfer_signed(v7, v6);
  1442. if ((v1 + v3 + v5) >= 0)
  1443. {
  1444. e0_r = v0; e1_r = v0 + v1;
  1445. e0_g = v2; e1_g = v2 + v3;
  1446. e0_b = v4; e1_b = v4 + v5;
  1447. e0_a = v6; e1_a = v6 + v7;
  1448. }
  1449. else
  1450. {
  1451. blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
  1452. blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
  1453. }
  1454. for (uint32_t c = 0; c < 4; c++)
  1455. {
  1456. pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
  1457. pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
  1458. }
  1459. break;
  1460. }
  1461. case CEM_HDR_LUM_LARGE_RANGE:
  1462. {
  1463. int y0, y1;
  1464. if (v1 >= v0)
  1465. {
  1466. y0 = (v0 << 4);
  1467. y1 = (v1 << 4);
  1468. }
  1469. else
  1470. {
  1471. y0 = (v1 << 4) + 8;
  1472. y1 = (v0 << 4) - 8;
  1473. }
  1474. e0_r = y0; e1_r = y1;
  1475. e0_g = y0; e1_g = y1;
  1476. e0_b = y0; e1_b = y1;
  1477. e0_a = 0x780; e1_a = 0x780;
  1478. break;
  1479. }
  1480. case CEM_HDR_LUM_SMALL_RANGE:
  1481. {
  1482. int y0, y1, d;
  1483. if ((v0 & 0x80) != 0)
  1484. {
  1485. y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
  1486. d = (v1 & 0x1F) << 2;
  1487. }
  1488. else
  1489. {
  1490. y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
  1491. d = (v1 & 0x0F) << 1;
  1492. }
  1493. y1 = y0 + d;
  1494. if (y1 > 0xFFF)
  1495. y1 = 0xFFF;
  1496. e0_r = y0; e1_r = y1;
  1497. e0_g = y0; e1_g = y1;
  1498. e0_b = y0; e1_b = y1;
  1499. e0_a = 0x780; e1_a = 0x780;
  1500. break;
  1501. }
  1502. case CEM_HDR_RGB_BASE_SCALE:
  1503. {
  1504. int v2 = pE[2], v3 = pE[3];
  1505. int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
  1506. int majcomp, mode;
  1507. if ((modeval & 0xC) != 0xC)
  1508. {
  1509. majcomp = modeval >> 2;
  1510. mode = modeval & 3;
  1511. }
  1512. else if (modeval != 0xF)
  1513. {
  1514. majcomp = modeval & 3;
  1515. mode = 4;
  1516. }
  1517. else
  1518. {
  1519. majcomp = 0;
  1520. mode = 5;
  1521. }
  1522. int red = v0 & 0x3f;
  1523. int green = v1 & 0x1f;
  1524. int blue = v2 & 0x1f;
  1525. int scale = v3 & 0x1f;
  1526. int x0 = (v1 >> 6) & 1;
  1527. int x1 = (v1 >> 5) & 1;
  1528. int x2 = (v2 >> 6) & 1;
  1529. int x3 = (v2 >> 5) & 1;
  1530. int x4 = (v3 >> 7) & 1;
  1531. int x5 = (v3 >> 6) & 1;
  1532. int x6 = (v3 >> 5) & 1;
  1533. int ohm = 1 << mode;
  1534. if (ohm & 0x30) green |= x0 << 6;
  1535. if (ohm & 0x3A) green |= x1 << 5;
  1536. if (ohm & 0x30) blue |= x2 << 6;
  1537. if (ohm & 0x3A) blue |= x3 << 5;
  1538. if (ohm & 0x3D) scale |= x6 << 5;
  1539. if (ohm & 0x2D) scale |= x5 << 6;
  1540. if (ohm & 0x04) scale |= x4 << 7;
  1541. if (ohm & 0x3B) red |= x4 << 6;
  1542. if (ohm & 0x04) red |= x3 << 6;
  1543. if (ohm & 0x10) red |= x5 << 7;
  1544. if (ohm & 0x0F) red |= x2 << 7;
  1545. if (ohm & 0x05) red |= x1 << 8;
  1546. if (ohm & 0x0A) red |= x0 << 8;
  1547. if (ohm & 0x05) red |= x0 << 9;
  1548. if (ohm & 0x02) red |= x6 << 9;
  1549. if (ohm & 0x01) red |= x3 << 10;
  1550. if (ohm & 0x02) red |= x5 << 10;
  1551. static const int s_shamts[6] = { 1,1,2,3,4,5 };
  1552. const int shamt = s_shamts[mode];
  1553. red <<= shamt;
  1554. green <<= shamt;
  1555. blue <<= shamt;
  1556. scale <<= shamt;
  1557. if (mode != 5)
  1558. {
  1559. green = red - green;
  1560. blue = red - blue;
  1561. }
  1562. if (majcomp == 1)
  1563. std::swap(red, green);
  1564. if (majcomp == 2)
  1565. std::swap(red, blue);
  1566. e1_r = clamp(red, 0, 0xFFF);
  1567. e1_g = clamp(green, 0, 0xFFF);
  1568. e1_b = clamp(blue, 0, 0xFFF);
  1569. e1_a = 0x780;
  1570. e0_r = clamp(red - scale, 0, 0xFFF);
  1571. e0_g = clamp(green - scale, 0, 0xFFF);
  1572. e0_b = clamp(blue - scale, 0, 0xFFF);
  1573. e0_a = 0x780;
  1574. break;
  1575. }
  1576. case CEM_HDR_RGB_HDR_ALPHA:
  1577. case CEM_HDR_RGB_LDR_ALPHA:
  1578. case CEM_HDR_RGB:
  1579. {
  1580. int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
  1581. int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
  1582. e0_a = 0x780;
  1583. e1_a = 0x780;
  1584. if (majcomp == 3)
  1585. {
  1586. e0_r = v0 << 4;
  1587. e0_g = v2 << 4;
  1588. e0_b = (v4 & 0x7f) << 5;
  1589. e1_r = v1 << 4;
  1590. e1_g = v3 << 4;
  1591. e1_b = (v5 & 0x7f) << 5;
  1592. }
  1593. else
  1594. {
  1595. int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
  1596. int va = v0 | ((v1 & 0x40) << 2);
  1597. int vb0 = v2 & 0x3f;
  1598. int vb1 = v3 & 0x3f;
  1599. int vc = v1 & 0x3f;
  1600. int vd0 = v4 & 0x7f;
  1601. int vd1 = v5 & 0x7f;
  1602. static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
  1603. vd0 = sign_extend(vd0, s_dbitstab[mode]);
  1604. vd1 = sign_extend(vd1, s_dbitstab[mode]);
  1605. int x0 = (v2 >> 6) & 1;
  1606. int x1 = (v3 >> 6) & 1;
  1607. int x2 = (v4 >> 6) & 1;
  1608. int x3 = (v5 >> 6) & 1;
  1609. int x4 = (v4 >> 5) & 1;
  1610. int x5 = (v5 >> 5) & 1;
  1611. int ohm = 1 << mode;
  1612. if (ohm & 0xA4) va |= x0 << 9;
  1613. if (ohm & 0x08) va |= x2 << 9;
  1614. if (ohm & 0x50) va |= x4 << 9;
  1615. if (ohm & 0x50) va |= x5 << 10;
  1616. if (ohm & 0xA0) va |= x1 << 10;
  1617. if (ohm & 0xC0) va |= x2 << 11;
  1618. if (ohm & 0x04) vc |= x1 << 6;
  1619. if (ohm & 0xE8) vc |= x3 << 6;
  1620. if (ohm & 0x20) vc |= x2 << 7;
  1621. if (ohm & 0x5B) vb0 |= x0 << 6;
  1622. if (ohm & 0x5B) vb1 |= x1 << 6;
  1623. if (ohm & 0x12) vb0 |= x2 << 7;
  1624. if (ohm & 0x12) vb1 |= x3 << 7;
  1625. int shamt = (mode >> 1) ^ 3;
  1626. va = (uint32_t)va << shamt;
  1627. vb0 = (uint32_t)vb0 << shamt;
  1628. vb1 = (uint32_t)vb1 << shamt;
  1629. vc = (uint32_t)vc << shamt;
  1630. vd0 = (uint32_t)vd0 << shamt;
  1631. vd1 = (uint32_t)vd1 << shamt;
  1632. e1_r = clamp(va, 0, 0xFFF);
  1633. e1_g = clamp(va - vb0, 0, 0xFFF);
  1634. e1_b = clamp(va - vb1, 0, 0xFFF);
  1635. e0_r = clamp(va - vc, 0, 0xFFF);
  1636. e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
  1637. e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
  1638. if (majcomp == 1)
  1639. {
  1640. std::swap(e0_r, e0_g);
  1641. std::swap(e1_r, e1_g);
  1642. }
  1643. else if (majcomp == 2)
  1644. {
  1645. std::swap(e0_r, e0_b);
  1646. std::swap(e1_r, e1_b);
  1647. }
  1648. }
  1649. if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
  1650. {
  1651. int v6 = pE[6], v7 = pE[7];
  1652. e0_a = v6;
  1653. e1_a = v7;
  1654. }
  1655. else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
  1656. {
  1657. int v6 = pE[6], v7 = pE[7];
  1658. // Extract mode bits
  1659. int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
  1660. v6 &= 0x7F;
  1661. v7 &= 0x7F;
  1662. if (mode == 3)
  1663. {
  1664. e0_a = v6 << 5;
  1665. e1_a = v7 << 5;
  1666. }
  1667. else
  1668. {
  1669. v6 |= (v7 << (mode + 1)) & 0x780;
  1670. v7 &= (0x3F >> mode);
  1671. v7 ^= (0x20 >> mode);
  1672. v7 -= (0x20 >> mode);
  1673. v6 <<= (4 - mode);
  1674. v7 <<= (4 - mode);
  1675. v7 += v6;
  1676. v7 = clamp(v7, 0, 0xFFF);
  1677. e0_a = v6;
  1678. e1_a = v7;
  1679. }
  1680. }
  1681. break;
  1682. }
  1683. default:
  1684. {
  1685. assert(0);
  1686. for (uint32_t c = 0; c < 4; c++)
  1687. {
  1688. pEndpoints[c][0] = 0;
  1689. pEndpoints[c][1] = 0;
  1690. }
  1691. break;
  1692. }
  1693. }
  1694. }
  1695. static inline bool is_half_inf_or_nan(half_float v)
  1696. {
  1697. return get_bits(v, 10, 14) == 31;
  1698. }
  1699. // This float->half conversion matches how "F32TO16" works on Intel GPU's.
  1700. half_float float_to_half(float val, bool toward_zero)
  1701. {
  1702. union { float f; int32_t i; uint32_t u; } fi = { val };
  1703. const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
  1704. int s = flt_s, e = 0, m = 0;
  1705. // inf/NaN
  1706. if (flt_e == 0xff)
  1707. {
  1708. e = 31;
  1709. if (flt_m != 0) // NaN
  1710. m = 1;
  1711. }
  1712. // not zero or denormal
  1713. else if (flt_e != 0)
  1714. {
  1715. int new_exp = flt_e - 127;
  1716. if (new_exp > 15)
  1717. e = 31;
  1718. else if (new_exp < -14)
  1719. {
  1720. if (toward_zero)
  1721. m = (int)truncf((1 << 24) * fabsf(fi.f));
  1722. else
  1723. m = lrintf((1 << 24) * fabsf(fi.f));
  1724. }
  1725. else
  1726. {
  1727. e = new_exp + 15;
  1728. if (toward_zero)
  1729. m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
  1730. else
  1731. m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
  1732. }
  1733. }
  1734. assert((0 <= m) && (m <= 1024));
  1735. if (m == 1024)
  1736. {
  1737. e++;
  1738. m = 0;
  1739. }
  1740. assert((s >= 0) && (s <= 1));
  1741. assert((e >= 0) && (e <= 31));
  1742. assert((m >= 0) && (m <= 1023));
  1743. half_float result = (half_float)((s << 15) | (e << 10) | m);
  1744. return result;
  1745. }
  1746. float half_to_float(half_float hval)
  1747. {
  1748. union { float f; uint32_t u; } x = { 0 };
  1749. uint32_t s = ((uint32_t)hval >> 15) & 1;
  1750. uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
  1751. uint32_t m = (uint32_t)hval & 0x3FF;
  1752. if (!e)
  1753. {
  1754. if (!m)
  1755. {
  1756. // +- 0
  1757. x.u = s << 31;
  1758. return x.f;
  1759. }
  1760. else
  1761. {
  1762. // denormalized
  1763. while (!(m & 0x00000400))
  1764. {
  1765. m <<= 1;
  1766. --e;
  1767. }
  1768. ++e;
  1769. m &= ~0x00000400;
  1770. }
  1771. }
  1772. else if (e == 31)
  1773. {
  1774. if (m == 0)
  1775. {
  1776. // +/- INF
  1777. x.u = (s << 31) | 0x7f800000;
  1778. return x.f;
  1779. }
  1780. else
  1781. {
  1782. // +/- NaN
  1783. x.u = (s << 31) | 0x7f800000 | (m << 13);
  1784. return x.f;
  1785. }
  1786. }
  1787. e = e + (127 - 15);
  1788. m = m << 13;
  1789. assert(s <= 1);
  1790. assert(m <= 0x7FFFFF);
  1791. assert(e <= 255);
  1792. x.u = m | (e << 23) | (s << 31);
  1793. return x.f;
  1794. }
  1795. static inline half_float qlog16_to_half(int k)
  1796. {
  1797. assert((k >= 0) && (k <= 0xFFFF));
  1798. int E = (k & 0xF800) >> 11;
  1799. int M = k & 0x7FF;
  1800. int Mt;
  1801. if (M < 512)
  1802. Mt = 3 * M;
  1803. else if (M >= 1536)
  1804. Mt = 5 * M - 2048;
  1805. else
  1806. Mt = 4 * M - 512;
  1807. return (half_float)((E << 10) + (Mt >> 3));
  1808. }
  1809. // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
  1810. const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
  1811. const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
  1812. const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
  1813. const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
  1814. //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
  1815. const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
  1816. void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
  1817. {
  1818. int x = packed & 511;
  1819. int y = (packed >> 9) & 511;
  1820. int z = (packed >> 18) & 511;
  1821. int w = (packed >> 27) & 31;
  1822. const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
  1823. r = x * scale;
  1824. g = y * scale;
  1825. b = z * scale;
  1826. }
  1827. // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
  1828. static inline int floor_log2(float x)
  1829. {
  1830. union float754
  1831. {
  1832. unsigned int raw;
  1833. float value;
  1834. };
  1835. float754 f;
  1836. f.value = x;
  1837. // Extract float exponent
  1838. return ((f.raw >> 23) & 0xFF) - 127;
  1839. }
  1840. static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
  1841. static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
  1842. uint32_t pack_rgb9e5(float r, float g, float b)
  1843. {
  1844. r = clampf(r, 0.0f, MAX_RGB9E5);
  1845. g = clampf(g, 0.0f, MAX_RGB9E5);
  1846. b = clampf(b, 0.0f, MAX_RGB9E5);
  1847. float maxrgb = maximumf(maximumf(r, g), b);
  1848. int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
  1849. assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
  1850. float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
  1851. int maxm = (int)floorf((maxrgb / denom) + 0.5f);
  1852. if (maxm == (MAX_RGB9E5_MANTISSA + 1))
  1853. {
  1854. denom *= 2;
  1855. exp_shared += 1;
  1856. assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
  1857. }
  1858. else
  1859. {
  1860. assert(maxm <= MAX_RGB9E5_MANTISSA);
  1861. }
  1862. int rm = (int)floorf((r / denom) + 0.5f);
  1863. int gm = (int)floorf((g / denom) + 0.5f);
  1864. int bm = (int)floorf((b / denom) + 0.5f);
  1865. assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
  1866. assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
  1867. assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
  1868. return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
  1869. }
  1870. static inline int clz17(uint32_t x)
  1871. {
  1872. assert(x <= 0x1FFFF);
  1873. x &= 0x1FFFF;
  1874. if (!x)
  1875. return 17;
  1876. uint32_t n = 0;
  1877. while ((x & 0x10000) == 0)
  1878. {
  1879. x <<= 1u;
  1880. n++;
  1881. }
  1882. return n;
  1883. }
  1884. static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
  1885. {
  1886. int lz = clz17(Cr | Cg | Cb | 1);
  1887. if (Cr == 65535) { Cr = 65536; lz = 0; }
  1888. if (Cg == 65535) { Cg = 65536; lz = 0; }
  1889. if (Cb == 65535) { Cb = 65536; lz = 0; }
  1890. Cr <<= lz; Cg <<= lz; Cb <<= lz;
  1891. Cr = (Cr >> 8) & 0x1FF;
  1892. Cg = (Cg >> 8) & 0x1FF;
  1893. Cb = (Cb >> 8) & 0x1FF;
  1894. uint32_t exponent = 16 - lz;
  1895. uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
  1896. return texel;
  1897. }
  1898. static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
  1899. {
  1900. if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
  1901. if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
  1902. if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
  1903. int Re = (Cr >> 10) & 0x1F;
  1904. int Ge = (Cg >> 10) & 0x1F;
  1905. int Be = (Cb >> 10) & 0x1F;
  1906. int Rex = (Re == 0) ? 1 : Re;
  1907. int Gex = (Ge == 0) ? 1 : Ge;
  1908. int Bex = (Be == 0) ? 1 : Be;
  1909. int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
  1910. int Xe = Re | Ge | Be;
  1911. uint32_t rshift, gshift, bshift, expo;
  1912. if (Xe == 0)
  1913. {
  1914. expo = rshift = gshift = bshift = Xm;
  1915. }
  1916. else if (Re >= Ge && Re >= Be)
  1917. {
  1918. expo = Rex + 1;
  1919. rshift = 2;
  1920. gshift = Rex - Gex + 2;
  1921. bshift = Rex - Bex + 2;
  1922. }
  1923. else if (Ge >= Be)
  1924. {
  1925. expo = Gex + 1;
  1926. rshift = Gex - Rex + 2;
  1927. gshift = 2;
  1928. bshift = Gex - Bex + 2;
  1929. }
  1930. else
  1931. {
  1932. expo = Bex + 1;
  1933. rshift = Bex - Rex + 2;
  1934. gshift = Bex - Gex + 2;
  1935. bshift = 2;
  1936. }
  1937. int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
  1938. int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
  1939. int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
  1940. Rm = (Rm >> rshift) & 0x1FF;
  1941. Gm = (Gm >> gshift) & 0x1FF;
  1942. Bm = (Bm >> bshift) & 0x1FF;
  1943. uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
  1944. return texel;
  1945. }
  1946. // Important: pPixels is either 32-bit/texel or 64-bit/texel.
  1947. bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
  1948. {
  1949. assert(is_valid_block_size(blk_width, blk_height));
  1950. assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
  1951. if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
  1952. return false;
  1953. const uint32_t num_blk_pixels = blk_width * blk_height;
  1954. // Write block error color
  1955. if (dec_mode == cDecodeModeHDR16)
  1956. {
  1957. // NaN's
  1958. memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
  1959. }
  1960. else if (dec_mode == cDecodeModeRGB9E5)
  1961. {
  1962. const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
  1963. for (uint32_t i = 0; i < num_blk_pixels; i++)
  1964. ((uint32_t*)pPixels)[i] = purple_9e5;
  1965. }
  1966. else
  1967. {
  1968. for (uint32_t i = 0; i < num_blk_pixels; i++)
  1969. ((uint32_t*)pPixels)[i] = 0xFFFF00FF;
  1970. }
  1971. if (log_blk.m_error_flag)
  1972. {
  1973. // Should this return false? It's not an invalid logical block config, though.
  1974. return false;
  1975. }
  1976. // Handle solid color blocks
  1977. if (log_blk.m_solid_color_flag_ldr)
  1978. {
  1979. // LDR solid block
  1980. if (dec_mode == cDecodeModeHDR16)
  1981. {
  1982. // Convert LDR pixels to half-float
  1983. half_float h[4];
  1984. for (uint32_t c = 0; c < 4; c++)
  1985. h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
  1986. for (uint32_t i = 0; i < num_blk_pixels; i++)
  1987. memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
  1988. }
  1989. else if (dec_mode == cDecodeModeRGB9E5)
  1990. {
  1991. float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
  1992. float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
  1993. float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
  1994. const uint32_t packed = pack_rgb9e5(r, g, b);
  1995. for (uint32_t i = 0; i < num_blk_pixels; i++)
  1996. ((uint32_t*)pPixels)[i] = packed;
  1997. }
  1998. else
  1999. {
  2000. // Convert LDR pixels to 8-bits
  2001. for (uint32_t i = 0; i < num_blk_pixels; i++)
  2002. for (uint32_t c = 0; c < 4; c++)
  2003. ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
  2004. }
  2005. return true;
  2006. }
  2007. else if (log_blk.m_solid_color_flag_hdr)
  2008. {
  2009. // HDR solid block, decode mode must be half-float or RGB9E5
  2010. if (dec_mode == cDecodeModeHDR16)
  2011. {
  2012. for (uint32_t i = 0; i < num_blk_pixels; i++)
  2013. memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
  2014. }
  2015. else if (dec_mode == cDecodeModeRGB9E5)
  2016. {
  2017. float r = half_to_float(log_blk.m_solid_color[0]);
  2018. float g = half_to_float(log_blk.m_solid_color[1]);
  2019. float b = half_to_float(log_blk.m_solid_color[2]);
  2020. const uint32_t packed = pack_rgb9e5(r, g, b);
  2021. for (uint32_t i = 0; i < num_blk_pixels; i++)
  2022. ((uint32_t*)pPixels)[i] = packed;
  2023. }
  2024. else
  2025. {
  2026. return false;
  2027. }
  2028. return true;
  2029. }
  2030. // Sanity check block's config
  2031. if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
  2032. return false;
  2033. if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
  2034. return false;
  2035. if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
  2036. return false;
  2037. if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
  2038. return false;
  2039. if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
  2040. return false;
  2041. if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
  2042. return false;
  2043. if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
  2044. return false;
  2045. if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
  2046. return false;
  2047. if (log_blk.m_color_component_selector > 3)
  2048. return false;
  2049. const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
  2050. const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
  2051. bool is_ldr_endpoints[MAX_PARTITIONS];
  2052. // Check CEM's
  2053. uint32_t total_cem_vals = 0;
  2054. for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
  2055. {
  2056. if (log_blk.m_color_endpoint_modes[i] > 15)
  2057. return false;
  2058. total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
  2059. is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
  2060. }
  2061. if (total_cem_vals > MAX_ENDPOINTS)
  2062. return false;
  2063. const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
  2064. const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
  2065. // Dequantized endpoints to [0,255]
  2066. uint8_t dequantized_endpoints[MAX_ENDPOINTS];
  2067. for (uint32_t i = 0; i < total_cem_vals; i++)
  2068. {
  2069. if (log_blk.m_endpoints[i] >= total_endpoint_levels)
  2070. return false;
  2071. dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
  2072. }
  2073. // Dequantize weights to [0,64]
  2074. uint8_t dequantized_weights[2][12 * 12];
  2075. const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
  2076. const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
  2077. const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
  2078. for (uint32_t i = 0; i < total_weight_vals; i++)
  2079. {
  2080. if (log_blk.m_weights[i] >= total_weight_levels)
  2081. return false;
  2082. const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
  2083. const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
  2084. dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
  2085. }
  2086. // Upsample weight grid. [0,64] weights
  2087. uint8_t upsampled_weights[2][12 * 12];
  2088. upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
  2089. if (log_blk.m_dual_plane)
  2090. upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
  2091. // Decode CEM's
  2092. int endpoints[4][4][2]; // [subset][comp][l/h]
  2093. uint32_t endpoint_val_index = 0;
  2094. for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
  2095. {
  2096. const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
  2097. decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
  2098. endpoint_val_index += get_num_cem_values(cem_index);
  2099. }
  2100. // Decode texels
  2101. const bool small_block = num_blk_pixels < 31;
  2102. const bool use_precomputed_texel_partitions = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
  2103. const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
  2104. bool success = true;
  2105. if (dec_mode == cDecodeModeRGB9E5)
  2106. {
  2107. // returns uint32_t's
  2108. for (uint32_t y = 0; y < blk_height; y++)
  2109. {
  2110. for (uint32_t x = 0; x < blk_width; x++)
  2111. {
  2112. const uint32_t pixel_index = x + y * blk_width;
  2113. const uint32_t subset = (log_blk.m_num_partitions > 1) ?
  2114. (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
  2115. : 0;
  2116. int comp[3];
  2117. for (uint32_t c = 0; c < 3; c++)
  2118. {
  2119. const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
  2120. if (is_ldr_endpoints[subset])
  2121. {
  2122. assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
  2123. assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
  2124. int le = endpoints[subset][c][0];
  2125. int he = endpoints[subset][c][1];
  2126. le = (le << 8) | le;
  2127. he = (he << 8) | he;
  2128. int k = weight_interpolate(le, he, w);
  2129. assert((k >= 0) && (k <= 0xFFFF));
  2130. comp[c] = k; // 1.0
  2131. }
  2132. else
  2133. {
  2134. assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
  2135. assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
  2136. int le = endpoints[subset][c][0] << 4;
  2137. int he = endpoints[subset][c][1] << 4;
  2138. int qlog16 = weight_interpolate(le, he, w);
  2139. comp[c] = qlog16_to_half(qlog16);
  2140. if (is_half_inf_or_nan((half_float)comp[c]))
  2141. comp[c] = 0x7BFF;
  2142. }
  2143. } // c
  2144. uint32_t packed;
  2145. if (is_ldr_endpoints[subset])
  2146. packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
  2147. else
  2148. packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
  2149. ((uint32_t*)pPixels)[pixel_index] = packed;
  2150. } // x
  2151. } // y
  2152. }
  2153. else if (dec_mode == cDecodeModeHDR16)
  2154. {
  2155. // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
  2156. // returns half floats
  2157. for (uint32_t y = 0; y < blk_height; y++)
  2158. {
  2159. for (uint32_t x = 0; x < blk_width; x++)
  2160. {
  2161. const uint32_t pixel_index = x + y * blk_width;
  2162. const uint32_t subset = (log_blk.m_num_partitions > 1) ?
  2163. (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
  2164. : 0;
  2165. for (uint32_t c = 0; c < 4; c++)
  2166. {
  2167. const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
  2168. half_float o;
  2169. if ( (is_ldr_endpoints[subset]) ||
  2170. ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
  2171. {
  2172. assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
  2173. assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
  2174. int le = endpoints[subset][c][0];
  2175. int he = endpoints[subset][c][1];
  2176. le = (le << 8) | le;
  2177. he = (he << 8) | he;
  2178. int k = weight_interpolate(le, he, w);
  2179. assert((k >= 0) && (k <= 0xFFFF));
  2180. if (k == 0xFFFF)
  2181. o = 0x3C00; // 1.0
  2182. else
  2183. o = float_to_half((float)k * (1.0f / 65536.0f), true);
  2184. }
  2185. else
  2186. {
  2187. assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
  2188. assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
  2189. int le = endpoints[subset][c][0] << 4;
  2190. int he = endpoints[subset][c][1] << 4;
  2191. int qlog16 = weight_interpolate(le, he, w);
  2192. o = qlog16_to_half(qlog16);
  2193. if (is_half_inf_or_nan(o))
  2194. o = 0x7BFF;
  2195. }
  2196. ((half_float*)pPixels)[pixel_index * 4 + c] = o;
  2197. }
  2198. } // x
  2199. } // y
  2200. }
  2201. else
  2202. {
  2203. // returns uint8_t's
  2204. for (uint32_t y = 0; y < blk_height; y++)
  2205. {
  2206. for (uint32_t x = 0; x < blk_width; x++)
  2207. {
  2208. const uint32_t pixel_index = x + y * blk_width;
  2209. const uint32_t subset = (log_blk.m_num_partitions > 1) ?
  2210. (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
  2211. : 0;
  2212. if (!is_ldr_endpoints[subset])
  2213. {
  2214. ((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;
  2215. success = false;
  2216. }
  2217. else
  2218. {
  2219. for (uint32_t c = 0; c < 4; c++)
  2220. {
  2221. const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
  2222. int le = endpoints[subset][c][0];
  2223. int he = endpoints[subset][c][1];
  2224. // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
  2225. //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
  2226. // See https://github.com/ARM-software/astc-encoder/issues/447
  2227. if (dec_mode == cDecodeModeSRGB8)
  2228. {
  2229. le = (le << 8) | 0x80;
  2230. he = (he << 8) | 0x80;
  2231. }
  2232. else
  2233. {
  2234. le = (le << 8) | le;
  2235. he = (he << 8) | he;
  2236. }
  2237. uint32_t k = weight_interpolate(le, he, w);
  2238. // FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
  2239. // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.
  2240. // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
  2241. ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
  2242. }
  2243. }
  2244. } // x
  2245. } // y
  2246. }
  2247. return success;
  2248. }
  2249. //------------------------------------------------
  2250. // Physical to logical block decoding
  2251. // unsigned 128-bit int, with some signed helpers
  2252. class uint128
  2253. {
  2254. uint64_t m_lo, m_hi;
  2255. public:
  2256. uint128() = default;
  2257. inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
  2258. inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
  2259. inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
  2260. inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
  2261. inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
  2262. inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
  2263. inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
  2264. inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
  2265. inline explicit operator uint64_t () const { return m_lo; }
  2266. inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
  2267. inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
  2268. inline uint64_t get_low() const { return m_lo; }
  2269. inline uint64_t& get_low() { return m_lo; }
  2270. inline uint64_t get_high() const { return m_hi; }
  2271. inline uint64_t& get_high() { return m_hi; }
  2272. inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
  2273. inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
  2274. inline bool operator< (const uint128& rhs) const
  2275. {
  2276. if (m_hi < rhs.m_hi)
  2277. return true;
  2278. if (m_hi == rhs.m_hi)
  2279. {
  2280. if (m_lo < rhs.m_lo)
  2281. return true;
  2282. }
  2283. return false;
  2284. }
  2285. inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
  2286. inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
  2287. inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
  2288. inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
  2289. inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
  2290. inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
  2291. inline explicit operator bool() const { return is_non_zero(); }
  2292. inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
  2293. inline bool signed_less(const uint128& rhs) const
  2294. {
  2295. const bool l_signed = is_signed(), r_signed = rhs.is_signed();
  2296. if (l_signed == r_signed)
  2297. return *this < rhs;
  2298. if (l_signed && !r_signed)
  2299. return true;
  2300. assert(!l_signed && r_signed);
  2301. return false;
  2302. }
  2303. inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
  2304. inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
  2305. inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
  2306. double get_double() const
  2307. {
  2308. double res = 0;
  2309. if (m_hi)
  2310. res = (double)m_hi * pow(2.0f, 64.0f);
  2311. res += (double)m_lo;
  2312. return res;
  2313. }
  2314. double get_signed_double() const
  2315. {
  2316. if (is_signed())
  2317. return -(uint128(*this).abs().get_double());
  2318. else
  2319. return get_double();
  2320. }
  2321. inline uint128 abs() const
  2322. {
  2323. uint128 res(*this);
  2324. if (res.is_signed())
  2325. res = -res;
  2326. return res;
  2327. }
  2328. inline uint128& operator<<= (int shift)
  2329. {
  2330. assert(shift >= 0);
  2331. if (shift < 0)
  2332. return *this;
  2333. m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
  2334. if ((shift) && (shift < 64))
  2335. m_hi |= (m_lo >> (64 - shift));
  2336. m_lo = (shift >= 64) ? 0 : (m_lo << shift);
  2337. return *this;
  2338. }
  2339. inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
  2340. inline uint128& operator>>= (int shift)
  2341. {
  2342. assert(shift >= 0);
  2343. if (shift < 0)
  2344. return *this;
  2345. m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
  2346. if ((shift) && (shift < 64))
  2347. m_lo |= (m_hi << (64 - shift));
  2348. m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
  2349. return *this;
  2350. }
  2351. inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
  2352. inline uint128 signed_shift_right(int shift) const
  2353. {
  2354. uint128 res(*this);
  2355. res >>= shift;
  2356. if (is_signed())
  2357. {
  2358. uint128 x(0U);
  2359. x = ~x;
  2360. x >>= shift;
  2361. res |= (~x);
  2362. }
  2363. return res;
  2364. }
  2365. inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
  2366. inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
  2367. inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
  2368. inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }
  2369. inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
  2370. inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }
  2371. inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
  2372. inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
  2373. // prefix
  2374. inline uint128 operator ++()
  2375. {
  2376. if (++m_lo == 0)
  2377. ++m_hi;
  2378. return *this;
  2379. }
  2380. // postfix
  2381. inline uint128 operator ++(int)
  2382. {
  2383. uint128 res(*this);
  2384. if (++m_lo == 0)
  2385. ++m_hi;
  2386. return res;
  2387. }
  2388. // prefix
  2389. inline uint128 operator --()
  2390. {
  2391. const uint64_t t = m_lo;
  2392. if (--m_lo > t)
  2393. --m_hi;
  2394. return *this;
  2395. }
  2396. // postfix
  2397. inline uint128 operator --(int)
  2398. {
  2399. const uint64_t t = m_lo;
  2400. uint128 res(*this);
  2401. if (--m_lo > t)
  2402. --m_hi;
  2403. return res;
  2404. }
  2405. inline uint128& operator+= (const uint128& rhs)
  2406. {
  2407. const uint64_t t = m_lo + rhs.m_lo;
  2408. m_hi = m_hi + rhs.m_hi + (t < m_lo);
  2409. m_lo = t;
  2410. return *this;
  2411. }
  2412. inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
  2413. inline uint128& operator-= (const uint128& rhs)
  2414. {
  2415. const uint64_t t = m_lo - rhs.m_lo;
  2416. m_hi = m_hi - rhs.m_hi - (t > m_lo);
  2417. m_lo = t;
  2418. return *this;
  2419. }
  2420. inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
  2421. // computes bit by bit, very slow
  2422. uint128& operator*=(const uint128& rhs)
  2423. {
  2424. uint128 temp(*this), result(0U);
  2425. for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
  2426. if (bitmask.get_low() & 1)
  2427. result += temp;
  2428. *this = result;
  2429. return *this;
  2430. }
  2431. uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
  2432. // computes bit by bit, very slow
  2433. friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
  2434. {
  2435. remainder = 0;
  2436. if (!divisor)
  2437. {
  2438. assert(0);
  2439. return ~uint128(0U);
  2440. }
  2441. uint128 quotient(0), one(1);
  2442. for (int i = 127; i >= 0; i--)
  2443. {
  2444. remainder = (remainder << 1) | ((dividend >> i) & one);
  2445. if (remainder >= divisor)
  2446. {
  2447. remainder -= divisor;
  2448. quotient |= (one << i);
  2449. }
  2450. }
  2451. return quotient;
  2452. }
  2453. uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
  2454. uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
  2455. uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
  2456. uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
  2457. void print_hex(FILE* pFile) const
  2458. {
  2459. fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
  2460. }
  2461. void format_unsigned(std::string& res) const
  2462. {
  2463. basisu::vector<uint8_t> digits;
  2464. digits.reserve(39 + 1);
  2465. uint128 k(*this), ten(10);
  2466. do
  2467. {
  2468. uint128 r;
  2469. k = divide(k, ten, r);
  2470. digits.push_back((uint8_t)r);
  2471. } while (k);
  2472. for (int i = (int)digits.size() - 1; i >= 0; i--)
  2473. res += ('0' + digits[i]);
  2474. }
  2475. void format_signed(std::string& res) const
  2476. {
  2477. uint128 val(*this);
  2478. if (val.is_signed())
  2479. {
  2480. res.push_back('-');
  2481. val = -val;
  2482. }
  2483. val.format_unsigned(res);
  2484. }
  2485. void print_unsigned(FILE* pFile)
  2486. {
  2487. std::string str;
  2488. format_unsigned(str);
  2489. fprintf(pFile, "%s", str.c_str());
  2490. }
  2491. void print_signed(FILE* pFile)
  2492. {
  2493. std::string str;
  2494. format_signed(str);
  2495. fprintf(pFile, "%s", str.c_str());
  2496. }
  2497. uint128 get_reversed_bits() const
  2498. {
  2499. uint128 res;
  2500. const uint32_t* pSrc = (const uint32_t*)this;
  2501. uint32_t* pDst = (uint32_t*)&res;
  2502. pDst[0] = rev_dword(pSrc[3]);
  2503. pDst[1] = rev_dword(pSrc[2]);
  2504. pDst[2] = rev_dword(pSrc[1]);
  2505. pDst[3] = rev_dword(pSrc[0]);
  2506. return res;
  2507. }
  2508. uint128 get_byteswapped() const
  2509. {
  2510. uint128 res;
  2511. const uint8_t* pSrc = (const uint8_t*)this;
  2512. uint8_t* pDst = (uint8_t*)&res;
  2513. for (uint32_t i = 0; i < 16; i++)
  2514. pDst[i] = pSrc[15 - i];
  2515. return res;
  2516. }
  2517. inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
  2518. {
  2519. assert(bit_ofs < 128);
  2520. assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
  2521. uint128 res(*this);
  2522. res >>= bit_ofs;
  2523. const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
  2524. return res.get_low() & bitmask;
  2525. }
  2526. inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
  2527. {
  2528. assert(bit_len <= 32);
  2529. return (uint32_t)get_bits64(bit_ofs, bit_len);
  2530. }
  2531. inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
  2532. {
  2533. assert(len && (len <= 32));
  2534. uint32_t x = get_bits(bit_ofs, len);
  2535. bit_ofs += len;
  2536. return x;
  2537. }
  2538. inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
  2539. {
  2540. assert(bit_ofs < 128);
  2541. assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
  2542. uint128 bitmask(1);
  2543. bitmask = (bitmask << num_bits) - 1;
  2544. assert(uint128(val) <= bitmask);
  2545. bitmask <<= bit_ofs;
  2546. *this &= ~bitmask;
  2547. *this = *this | (uint128(val) << bit_ofs);
  2548. return *this;
  2549. }
  2550. };
  2551. static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
  2552. {
  2553. if (bits.get_bits(10, 2) != 0b11)
  2554. return false;
  2555. uint32_t bit_ofs = 12;
  2556. const uint32_t min_s = bits.next_bits(bit_ofs, 13);
  2557. const uint32_t max_s = bits.next_bits(bit_ofs, 13);
  2558. const uint32_t min_t = bits.next_bits(bit_ofs, 13);
  2559. const uint32_t max_t = bits.next_bits(bit_ofs, 13);
  2560. assert(bit_ofs == 64);
  2561. const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
  2562. if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
  2563. return false;
  2564. const bool hdr_flag = bits.get_bits(9, 1) != 0;
  2565. if (hdr_flag)
  2566. log_blk.m_solid_color_flag_hdr = true;
  2567. else
  2568. log_blk.m_solid_color_flag_ldr = true;
  2569. log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
  2570. log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
  2571. log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
  2572. log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
  2573. if (log_blk.m_solid_color_flag_hdr)
  2574. {
  2575. for (uint32_t c = 0; c < 4; c++)
  2576. if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
  2577. return false;
  2578. }
  2579. return true;
  2580. }
  2581. struct astc_dec_row
  2582. {
  2583. int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
  2584. };
  2585. static const astc_dec_row s_dec_rows[10] =
  2586. {
  2587. // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
  2588. { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2
  2589. { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2
  2590. { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8
  2591. { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6
  2592. { 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2
  2593. { 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2
  2594. { 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12
  2595. { 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10
  2596. { 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6
  2597. { -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6
  2598. };
  2599. static bool decode_config(const uint128& bits, log_astc_block& log_blk)
  2600. {
  2601. // Reserved
  2602. if (bits.get_bits(0, 4) == 0)
  2603. return false;
  2604. // Reserved
  2605. if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
  2606. {
  2607. if (bits.get_bits(2, 4) != 0b1111)
  2608. return false;
  2609. }
  2610. // Void extent
  2611. if (bits.get_bits(0, 9) == 0b111111100)
  2612. return decode_void_extent(bits, log_blk);
  2613. // Check rows
  2614. const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
  2615. const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
  2616. const uint32_t x7_2 = bits.get_bits(7, 2);
  2617. int row_index = -1;
  2618. if (x0_2 == 0)
  2619. {
  2620. if (x7_2 == 0b00)
  2621. row_index = 5;
  2622. else if (x7_2 == 0b01)
  2623. row_index = 6;
  2624. else if (x5_4 == 0b1100)
  2625. row_index = 7;
  2626. else if (x5_4 == 0b1101)
  2627. row_index = 8;
  2628. else if (x7_2 == 0b10)
  2629. row_index = 9;
  2630. }
  2631. else
  2632. {
  2633. if (x2_2 == 0b00)
  2634. row_index = 0;
  2635. else if (x2_2 == 0b01)
  2636. row_index = 1;
  2637. else if (x2_2 == 0b10)
  2638. row_index = 2;
  2639. else if ((x2_2 == 0b11) && (x8_1 == 0))
  2640. row_index = 3;
  2641. else if ((x2_2 == 0b11) && (x8_1 == 1))
  2642. row_index = 4;
  2643. }
  2644. if (row_index < 0)
  2645. return false;
  2646. const astc_dec_row& r = s_dec_rows[row_index];
  2647. bool P = false, Dp = false;
  2648. uint32_t W = r.W_bias, H = r.H_bias;
  2649. if (r.P_ofs >= 0)
  2650. P = bits.get_bits(r.P_ofs, 1) != 0;
  2651. if (r.Dp_ofs >= 0)
  2652. Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
  2653. if (r.W_size)
  2654. W += bits.get_bits(r.W_ofs, r.W_size);
  2655. if (r.H_size)
  2656. H += bits.get_bits(r.H_ofs, r.H_size);
  2657. assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
  2658. assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
  2659. int p0 = bits.get_bits(r.p0_ofs, 1);
  2660. int p1 = bits.get_bits(r.p1_ofs, 1);
  2661. int p2 = bits.get_bits(r.p2_ofs, 1);
  2662. uint32_t p = p0 | (p1 << 1) | (p2 << 2);
  2663. if (p < 2)
  2664. return false;
  2665. log_blk.m_grid_width = W;
  2666. log_blk.m_grid_height = H;
  2667. log_blk.m_weight_ise_range = (p - 2) + (P * BISE_10_LEVELS);
  2668. assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
  2669. log_blk.m_dual_plane = Dp;
  2670. return true;
  2671. }
  2672. static inline uint32_t read_le_dword(const uint8_t* pBytes)
  2673. {
  2674. return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
  2675. }
  2676. // See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
  2677. static const uint8_t s_trit_decode[256][5] =
  2678. {
  2679. {0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
  2680. {0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
  2681. {0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
  2682. {0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
  2683. {0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
  2684. {0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
  2685. {0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
  2686. {0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
  2687. {0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
  2688. {0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
  2689. {0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
  2690. {0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
  2691. {0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
  2692. {0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
  2693. {0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
  2694. {0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
  2695. {0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
  2696. {0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
  2697. {0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
  2698. {0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
  2699. {0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
  2700. {0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
  2701. {0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
  2702. {0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
  2703. {0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
  2704. {0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
  2705. {0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
  2706. {0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
  2707. {0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
  2708. {0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
  2709. {0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
  2710. {0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
  2711. };
  2712. static const uint8_t s_quint_decode[128][3] =
  2713. {
  2714. {0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
  2715. {0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
  2716. {0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
  2717. {0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
  2718. {0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
  2719. {0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
  2720. {0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
  2721. {0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
  2722. {0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
  2723. {0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
  2724. {0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
  2725. {0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
  2726. {0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
  2727. {0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
  2728. {0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
  2729. {0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
  2730. };
  2731. static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
  2732. {
  2733. assert((num_vals >= 1) && (num_vals <= 5));
  2734. uint32_t m[5] = { 0 }, T = 0;
  2735. static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
  2736. for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
  2737. {
  2738. if (bits_per_val)
  2739. m[c] = bits.next_bits(bit_ofs, bits_per_val);
  2740. T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
  2741. T_ofs += s_t_bits[c];
  2742. }
  2743. const uint8_t (&p_trits)[5] = s_trit_decode[T];
  2744. for (uint32_t i = 0; i < num_vals; i++)
  2745. pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
  2746. }
  2747. static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
  2748. {
  2749. assert((num_vals >= 1) && (num_vals <= 3));
  2750. uint32_t m[3] = { 0 }, T = 0;
  2751. static const uint8_t s_t_bits[3] = { 3, 2, 2 };
  2752. for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
  2753. {
  2754. if (bits_per_val)
  2755. m[c] = bits.next_bits(bit_ofs, bits_per_val);
  2756. T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
  2757. T_ofs += s_t_bits[c];
  2758. }
  2759. const uint8_t (&p_quints)[3] = s_quint_decode[T];
  2760. for (uint32_t i = 0; i < num_vals; i++)
  2761. pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
  2762. }
  2763. static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
  2764. {
  2765. assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
  2766. const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
  2767. if (g_ise_range_table[ise_range][1])
  2768. {
  2769. // Trits+bits, 5 vals per block, 7 bits extra per block
  2770. const uint32_t total_blocks = (num_vals + 4) / 5;
  2771. for (uint32_t b = 0; b < total_blocks; b++)
  2772. {
  2773. const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
  2774. decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
  2775. }
  2776. }
  2777. else if (g_ise_range_table[ise_range][2])
  2778. {
  2779. // Quints+bits, 3 vals per block, 8 bits extra per block
  2780. const uint32_t total_blocks = (num_vals + 2) / 3;
  2781. for (uint32_t b = 0; b < total_blocks; b++)
  2782. {
  2783. const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
  2784. decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
  2785. }
  2786. }
  2787. else
  2788. {
  2789. assert(bits_per_val);
  2790. // Only bits
  2791. for (uint32_t i = 0; i < num_vals; i++)
  2792. pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
  2793. }
  2794. }
  2795. void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
  2796. {
  2797. const uint128 bits(
  2798. (uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
  2799. (uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
  2800. return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
  2801. }
  2802. // Decodes a physical ASTC block to a logical ASTC block.
  2803. // blk_width/blk_height are only used to validate the weight grid's dimensions.
  2804. bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
  2805. {
  2806. assert(is_valid_block_size(blk_width, blk_height));
  2807. const uint8_t* pS = (uint8_t*)pASTC_block;
  2808. log_blk.clear();
  2809. log_blk.m_error_flag = true;
  2810. const uint128 bits(
  2811. (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
  2812. (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
  2813. const uint128 rev_bits(bits.get_reversed_bits());
  2814. if (!decode_config(bits, log_blk))
  2815. return false;
  2816. if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
  2817. {
  2818. // Void extent
  2819. log_blk.m_error_flag = false;
  2820. return true;
  2821. }
  2822. // Check grid dimensions
  2823. if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
  2824. return false;
  2825. // Now we have the grid width/height, dual plane, weight ISE range
  2826. const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
  2827. const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
  2828. // 18.24 Illegal Encodings
  2829. if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
  2830. return false;
  2831. const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
  2832. uint32_t total_extra_bits = 0;
  2833. // Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
  2834. log_blk.m_num_partitions = bits.get_bits(11, 2) + 1;
  2835. if (log_blk.m_num_partitions == 1)
  2836. log_blk.m_color_endpoint_modes[0] = bits.get_bits(13, 4); // read CEM bits
  2837. else
  2838. {
  2839. // 2 or more partitions
  2840. if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
  2841. return false;
  2842. log_blk.m_partition_id = bits.get_bits(13, 10);
  2843. uint32_t cem_bits = bits.get_bits(23, 6);
  2844. if ((cem_bits & 3) == 0)
  2845. {
  2846. // All CEM's the same
  2847. for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
  2848. log_blk.m_color_endpoint_modes[i] = cem_bits >> 2;
  2849. }
  2850. else
  2851. {
  2852. // CEM's different, but within up to 2 adjacent classes
  2853. const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
  2854. total_extra_bits = 3 * log_blk.m_num_partitions - 4;
  2855. if ((total_weight_bits + total_extra_bits) > 128)
  2856. return false;
  2857. uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
  2858. uint32_t c[4] = { 0 }, m[4] = { 0 };
  2859. cem_bits >>= 2;
  2860. for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
  2861. c[i] = cem_bits & 1;
  2862. switch (log_blk.m_num_partitions)
  2863. {
  2864. case 2:
  2865. {
  2866. m[0] = cem_bits & 3;
  2867. m[1] = bits.next_bits(cem_bit_pos, 2);
  2868. break;
  2869. }
  2870. case 3:
  2871. {
  2872. m[0] = cem_bits & 1;
  2873. m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
  2874. m[1] = bits.next_bits(cem_bit_pos, 2);
  2875. m[2] = bits.next_bits(cem_bit_pos, 2);
  2876. break;
  2877. }
  2878. case 4:
  2879. {
  2880. for (uint32_t i = 0; i < 4; i++)
  2881. m[i] = bits.next_bits(cem_bit_pos, 2);
  2882. break;
  2883. }
  2884. default:
  2885. {
  2886. assert(0);
  2887. break;
  2888. }
  2889. }
  2890. assert(cem_bit_pos == end_of_weight_bit_ofs);
  2891. for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
  2892. {
  2893. log_blk.m_color_endpoint_modes[i] = first_cem_index + (c[i] * 4) + m[i];
  2894. assert(log_blk.m_color_endpoint_modes[i] <= 15);
  2895. }
  2896. }
  2897. }
  2898. // Now we have all the CEM indices.
  2899. if (log_blk.m_dual_plane)
  2900. {
  2901. // Read CCS bits, beneath any CEM bits
  2902. total_extra_bits += 2;
  2903. if (total_extra_bits > end_of_weight_bit_ofs)
  2904. return false;
  2905. uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
  2906. log_blk.m_color_component_selector = bits.get_bits(ccs_bit_pos, 2);
  2907. }
  2908. uint32_t config_bit_pos = 11 + 2; // config+num_parts
  2909. if (log_blk.m_num_partitions == 1)
  2910. config_bit_pos += 4; // CEM bits
  2911. else
  2912. config_bit_pos += 10 + 6; // part_id+CEM bits
  2913. // config+num_parts+total_extra_bits (CEM extra+CCS)
  2914. uint32_t total_config_bits = config_bit_pos + total_extra_bits;
  2915. // Compute number of remaining bits in block
  2916. const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
  2917. if (num_remaining_bits < 0)
  2918. return false;
  2919. // Compute total number of ISE encoded color endpoint mode values
  2920. uint32_t total_cem_vals = 0;
  2921. for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
  2922. total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
  2923. if (total_cem_vals > MAX_ENDPOINTS)
  2924. return false;
  2925. // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
  2926. int endpoint_ise_range = -1;
  2927. for (int k = 20; k > 0; k--)
  2928. {
  2929. int b = get_ise_sequence_bits(total_cem_vals, k);
  2930. if (b <= num_remaining_bits)
  2931. {
  2932. endpoint_ise_range = k;
  2933. break;
  2934. }
  2935. }
  2936. // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
  2937. if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
  2938. return false;
  2939. log_blk.m_endpoint_ise_range = endpoint_ise_range;
  2940. // Decode endpoints forwards in block
  2941. decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
  2942. // Decode grid weights backwards in block
  2943. decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
  2944. log_blk.m_error_flag = false;
  2945. return true;
  2946. }
  2947. } // namespace astc_helpers
  2948. #endif //BASISU_ASTC_HELPERS_IMPLEMENTATION