ucol_swp.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2015, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: ucol_swp.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003sep10
  16. * created by: Markus W. Scherer
  17. *
  18. * Swap collation binaries.
  19. */
  20. #include "unicode/udata.h" /* UDataInfo */
  21. #include "utrie.h"
  22. #include "utrie2.h"
  23. #include "udataswp.h"
  24. #include "cmemory.h"
  25. #include "ucol_data.h"
  26. #include "ucol_swp.h"
  27. /* swapping ----------------------------------------------------------------- */
  28. #if !UCONFIG_NO_COLLATION
  29. U_CAPI UBool U_EXPORT2
  30. ucol_looksLikeCollationBinary(const UDataSwapper *ds,
  31. const void *inData, int32_t length) {
  32. if(ds==nullptr || inData==nullptr || length<-1) {
  33. return false;
  34. }
  35. // First check for format version 4+ which has a standard data header.
  36. UErrorCode errorCode=U_ZERO_ERROR;
  37. (void)udata_swapDataHeader(ds, inData, -1, nullptr, &errorCode);
  38. if(U_SUCCESS(errorCode)) {
  39. const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
  40. if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
  41. info.dataFormat[1]==0x43 &&
  42. info.dataFormat[2]==0x6f &&
  43. info.dataFormat[3]==0x6c) {
  44. return true;
  45. }
  46. }
  47. // Else check for format version 3.
  48. const UCATableHeader *inHeader=(const UCATableHeader *)inData;
  49. /*
  50. * The collation binary must contain at least the UCATableHeader,
  51. * starting with its size field.
  52. * sizeof(UCATableHeader)==42*4 in ICU 2.8
  53. * check the length against the header size before reading the size field
  54. */
  55. UCATableHeader header;
  56. uprv_memset(&header, 0, sizeof(header));
  57. if(length<0) {
  58. header.size=udata_readInt32(ds, inHeader->size);
  59. } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
  60. return false;
  61. }
  62. header.magic=ds->readUInt32(inHeader->magic);
  63. if(!(
  64. header.magic==UCOL_HEADER_MAGIC &&
  65. inHeader->formatVersion[0]==3 /*&&
  66. inHeader->formatVersion[1]>=0*/
  67. )) {
  68. return false;
  69. }
  70. if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
  71. return false;
  72. }
  73. return true;
  74. }
  75. namespace {
  76. /* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
  77. int32_t
  78. swapFormatVersion3(const UDataSwapper *ds,
  79. const void *inData, int32_t length, void *outData,
  80. UErrorCode *pErrorCode) {
  81. const uint8_t *inBytes;
  82. uint8_t *outBytes;
  83. const UCATableHeader *inHeader;
  84. UCATableHeader *outHeader;
  85. UCATableHeader header;
  86. uint32_t count;
  87. /* argument checking in case we were not called from ucol_swap() */
  88. if(U_FAILURE(*pErrorCode)) {
  89. return 0;
  90. }
  91. if(ds==nullptr || inData==nullptr || length<-1 || (length>0 && outData==nullptr)) {
  92. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  93. return 0;
  94. }
  95. inBytes=(const uint8_t *)inData;
  96. outBytes=(uint8_t *)outData;
  97. inHeader=(const UCATableHeader *)inData;
  98. outHeader=(UCATableHeader *)outData;
  99. /*
  100. * The collation binary must contain at least the UCATableHeader,
  101. * starting with its size field.
  102. * sizeof(UCATableHeader)==42*4 in ICU 2.8
  103. * check the length against the header size before reading the size field
  104. */
  105. uprv_memset(&header, 0, sizeof(header));
  106. if(length<0) {
  107. header.size=udata_readInt32(ds, inHeader->size);
  108. } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
  109. udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
  110. length);
  111. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  112. return 0;
  113. }
  114. header.magic=ds->readUInt32(inHeader->magic);
  115. if(!(
  116. header.magic==UCOL_HEADER_MAGIC &&
  117. inHeader->formatVersion[0]==3 /*&&
  118. inHeader->formatVersion[1]>=0*/
  119. )) {
  120. udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
  121. header.magic,
  122. inHeader->formatVersion[0], inHeader->formatVersion[1]);
  123. *pErrorCode=U_UNSUPPORTED_ERROR;
  124. return 0;
  125. }
  126. if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
  127. udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
  128. inHeader->isBigEndian, inHeader->charSetFamily);
  129. *pErrorCode=U_INVALID_FORMAT_ERROR;
  130. return 0;
  131. }
  132. if(length>=0) {
  133. /* copy everything, takes care of data that needs no swapping */
  134. if(inBytes!=outBytes) {
  135. uprv_memcpy(outBytes, inBytes, header.size);
  136. }
  137. /* swap the necessary pieces in the order of their occurrence in the data */
  138. /* read more of the UCATableHeader (the size field was read above) */
  139. header.options= ds->readUInt32(inHeader->options);
  140. header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
  141. header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
  142. header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
  143. header.expansion= ds->readUInt32(inHeader->expansion);
  144. header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
  145. header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
  146. header.contractionSize= ds->readUInt32(inHeader->contractionSize);
  147. header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
  148. header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
  149. header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
  150. header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
  151. header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
  152. header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
  153. /* swap the 32-bit integers in the header */
  154. ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
  155. outHeader, pErrorCode);
  156. ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
  157. &(outHeader->scriptToLeadByte), pErrorCode);
  158. /* set the output platform properties */
  159. outHeader->isBigEndian=ds->outIsBigEndian;
  160. outHeader->charSetFamily=ds->outCharset;
  161. /* swap the options */
  162. if(header.options!=0) {
  163. ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
  164. outBytes+header.options, pErrorCode);
  165. }
  166. /* swap the expansions */
  167. if(header.mappingPosition!=0 && header.expansion!=0) {
  168. if(header.contractionIndex!=0) {
  169. /* expansions bounded by contractions */
  170. count=header.contractionIndex-header.expansion;
  171. } else {
  172. /* no contractions: expansions bounded by the main trie */
  173. count=header.mappingPosition-header.expansion;
  174. }
  175. ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
  176. outBytes+header.expansion, pErrorCode);
  177. }
  178. /* swap the contractions */
  179. if(header.contractionSize!=0) {
  180. /* contractionIndex: char16_t[] */
  181. ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
  182. outBytes+header.contractionIndex, pErrorCode);
  183. /* contractionCEs: CEs[] */
  184. ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
  185. outBytes+header.contractionCEs, pErrorCode);
  186. }
  187. /* swap the main trie */
  188. if(header.mappingPosition!=0) {
  189. count=header.endExpansionCE-header.mappingPosition;
  190. utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
  191. outBytes+header.mappingPosition, pErrorCode);
  192. }
  193. /* swap the max expansion table */
  194. if(header.endExpansionCECount!=0) {
  195. ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
  196. outBytes+header.endExpansionCE, pErrorCode);
  197. }
  198. /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
  199. /* swap UCA constants */
  200. if(header.UCAConsts!=0) {
  201. /*
  202. * if UCAConsts!=0 then contractionUCACombos because we are swapping
  203. * the UCA data file, and we know that the UCA contains contractions
  204. */
  205. ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
  206. outBytes+header.UCAConsts, pErrorCode);
  207. }
  208. /* swap UCA contractions */
  209. if(header.contractionUCACombosSize!=0) {
  210. count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
  211. ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
  212. outBytes+header.contractionUCACombos, pErrorCode);
  213. }
  214. /* swap the script to lead bytes */
  215. if(header.scriptToLeadByte!=0) {
  216. int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
  217. int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
  218. ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
  219. 4 + (4 * indexCount) + (2 * dataCount),
  220. outBytes+header.scriptToLeadByte, pErrorCode);
  221. }
  222. /* swap the lead byte to scripts */
  223. if(header.leadByteToScript!=0) {
  224. int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
  225. int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
  226. ds->swapArray16(ds, inBytes+header.leadByteToScript,
  227. 4 + (2 * indexCount) + (2 * dataCount),
  228. outBytes+header.leadByteToScript, pErrorCode);
  229. }
  230. }
  231. return header.size;
  232. }
  233. // swap formatVersion 4 or 5 ----------------------------------------------- ***
  234. // The following are copied from CollationDataReader, trading an awkward copy of constants
  235. // for an awkward relocation of the i18n collationdatareader.h file into the common library.
  236. // Keep them in sync!
  237. enum {
  238. IX_INDEXES_LENGTH, // 0
  239. IX_OPTIONS,
  240. IX_RESERVED2,
  241. IX_RESERVED3,
  242. IX_JAMO_CE32S_START, // 4
  243. IX_REORDER_CODES_OFFSET,
  244. IX_REORDER_TABLE_OFFSET,
  245. IX_TRIE_OFFSET,
  246. IX_RESERVED8_OFFSET, // 8
  247. IX_CES_OFFSET,
  248. IX_RESERVED10_OFFSET,
  249. IX_CE32S_OFFSET,
  250. IX_ROOT_ELEMENTS_OFFSET, // 12
  251. IX_CONTEXTS_OFFSET,
  252. IX_UNSAFE_BWD_OFFSET,
  253. IX_FAST_LATIN_TABLE_OFFSET,
  254. IX_SCRIPTS_OFFSET, // 16
  255. IX_COMPRESSIBLE_BYTES_OFFSET,
  256. IX_RESERVED18_OFFSET,
  257. IX_TOTAL_SIZE
  258. };
  259. int32_t
  260. swapFormatVersion4(const UDataSwapper *ds,
  261. const void *inData, int32_t length, void *outData,
  262. UErrorCode &errorCode) {
  263. if(U_FAILURE(errorCode)) { return 0; }
  264. const uint8_t *inBytes=(const uint8_t *)inData;
  265. uint8_t *outBytes=(uint8_t *)outData;
  266. const int32_t *inIndexes=(const int32_t *)inBytes;
  267. int32_t indexes[IX_TOTAL_SIZE+1];
  268. // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
  269. if(0<=length && length<8) {
  270. udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
  271. "(%d after header) for collation data\n",
  272. length);
  273. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  274. return 0;
  275. }
  276. int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
  277. if(0<=length && length<(indexesLength*4)) {
  278. udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
  279. "(%d after header) for collation data\n",
  280. length);
  281. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  282. return 0;
  283. }
  284. for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
  285. indexes[i]=udata_readInt32(ds, inIndexes[i]);
  286. }
  287. for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
  288. indexes[i]=-1;
  289. }
  290. inIndexes=nullptr; // Make sure we do not accidentally use these instead of indexes[].
  291. // Get the total length of the data.
  292. int32_t size;
  293. if(indexesLength>IX_TOTAL_SIZE) {
  294. size=indexes[IX_TOTAL_SIZE];
  295. } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
  296. size=indexes[indexesLength-1];
  297. } else {
  298. size=indexesLength*4;
  299. }
  300. if(length<0) { return size; }
  301. if(length<size) {
  302. udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
  303. "(%d after header) for collation data\n",
  304. length);
  305. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  306. return 0;
  307. }
  308. // Copy the data for inaccessible bytes and arrays of bytes.
  309. if(inBytes!=outBytes) {
  310. uprv_memcpy(outBytes, inBytes, size);
  311. }
  312. // Swap the int32_t indexes[].
  313. ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
  314. // The following is a modified version of CollationDataReader::read().
  315. // Here we use indexes[] not inIndexes[] because
  316. // the inIndexes[] may not be in this machine's endianness.
  317. int32_t index; // one of the indexes[] slots
  318. int32_t offset; // byte offset for the index part
  319. // int32_t length; // number of bytes in the index part
  320. index = IX_REORDER_CODES_OFFSET;
  321. offset = indexes[index];
  322. length = indexes[index + 1] - offset;
  323. if(length > 0) {
  324. ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  325. }
  326. // Skip the IX_REORDER_TABLE_OFFSET byte array.
  327. index = IX_TRIE_OFFSET;
  328. offset = indexes[index];
  329. length = indexes[index + 1] - offset;
  330. if(length > 0) {
  331. utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  332. }
  333. index = IX_RESERVED8_OFFSET;
  334. offset = indexes[index];
  335. length = indexes[index + 1] - offset;
  336. if(length > 0) {
  337. udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
  338. errorCode = U_UNSUPPORTED_ERROR;
  339. return 0;
  340. }
  341. index = IX_CES_OFFSET;
  342. offset = indexes[index];
  343. length = indexes[index + 1] - offset;
  344. if(length > 0) {
  345. ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  346. }
  347. index = IX_RESERVED10_OFFSET;
  348. offset = indexes[index];
  349. length = indexes[index + 1] - offset;
  350. if(length > 0) {
  351. udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
  352. errorCode = U_UNSUPPORTED_ERROR;
  353. return 0;
  354. }
  355. index = IX_CE32S_OFFSET;
  356. offset = indexes[index];
  357. length = indexes[index + 1] - offset;
  358. if(length > 0) {
  359. ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  360. }
  361. index = IX_ROOT_ELEMENTS_OFFSET;
  362. offset = indexes[index];
  363. length = indexes[index + 1] - offset;
  364. if(length > 0) {
  365. ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  366. }
  367. index = IX_CONTEXTS_OFFSET;
  368. offset = indexes[index];
  369. length = indexes[index + 1] - offset;
  370. if(length > 0) {
  371. ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  372. }
  373. index = IX_UNSAFE_BWD_OFFSET;
  374. offset = indexes[index];
  375. length = indexes[index + 1] - offset;
  376. if(length > 0) {
  377. ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  378. }
  379. index = IX_FAST_LATIN_TABLE_OFFSET;
  380. offset = indexes[index];
  381. length = indexes[index + 1] - offset;
  382. if(length > 0) {
  383. ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  384. }
  385. index = IX_SCRIPTS_OFFSET;
  386. offset = indexes[index];
  387. length = indexes[index + 1] - offset;
  388. if(length > 0) {
  389. ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
  390. }
  391. // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
  392. index = IX_RESERVED18_OFFSET;
  393. offset = indexes[index];
  394. length = indexes[index + 1] - offset;
  395. if(length > 0) {
  396. udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
  397. errorCode = U_UNSUPPORTED_ERROR;
  398. return 0;
  399. }
  400. return size;
  401. }
  402. } // namespace
  403. /* swap ICU collation data like ucadata.icu */
  404. U_CAPI int32_t U_EXPORT2
  405. ucol_swap(const UDataSwapper *ds,
  406. const void *inData, int32_t length, void *outData,
  407. UErrorCode *pErrorCode) {
  408. if(U_FAILURE(*pErrorCode)) { return 0; }
  409. /* udata_swapDataHeader checks the arguments */
  410. int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
  411. if(U_FAILURE(*pErrorCode)) {
  412. // Try to swap the old format version which did not have a standard data header.
  413. *pErrorCode=U_ZERO_ERROR;
  414. return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
  415. }
  416. /* check data format and format version */
  417. const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
  418. if(!(
  419. info.dataFormat[0]==0x55 && // dataFormat="UCol"
  420. info.dataFormat[1]==0x43 &&
  421. info.dataFormat[2]==0x6f &&
  422. info.dataFormat[3]==0x6c &&
  423. (3<=info.formatVersion[0] && info.formatVersion[0]<=5)
  424. )) {
  425. udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
  426. "(format version %02x.%02x) is not recognized as collation data\n",
  427. info.dataFormat[0], info.dataFormat[1],
  428. info.dataFormat[2], info.dataFormat[3],
  429. info.formatVersion[0], info.formatVersion[1]);
  430. *pErrorCode=U_UNSUPPORTED_ERROR;
  431. return 0;
  432. }
  433. inData=(const char *)inData+headerSize;
  434. if(length>=0) { length-=headerSize; }
  435. outData=(outData == nullptr) ? nullptr : (char *)outData+headerSize;
  436. int32_t collationSize;
  437. if(info.formatVersion[0]>=4) {
  438. collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
  439. } else {
  440. collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
  441. }
  442. if(U_SUCCESS(*pErrorCode)) {
  443. return headerSize+collationSize;
  444. } else {
  445. return 0;
  446. }
  447. }
  448. /* swap inverse UCA collation data (invuca.icu) */
  449. U_CAPI int32_t U_EXPORT2
  450. ucol_swapInverseUCA(const UDataSwapper *ds,
  451. const void *inData, int32_t length, void *outData,
  452. UErrorCode *pErrorCode) {
  453. const UDataInfo *pInfo;
  454. int32_t headerSize;
  455. const uint8_t *inBytes;
  456. uint8_t *outBytes;
  457. const InverseUCATableHeader *inHeader;
  458. InverseUCATableHeader *outHeader;
  459. InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
  460. /* udata_swapDataHeader checks the arguments */
  461. headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
  462. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  463. return 0;
  464. }
  465. /* check data format and format version */
  466. pInfo=(const UDataInfo *)((const char *)inData+4);
  467. if(!(
  468. pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
  469. pInfo->dataFormat[1]==0x6e &&
  470. pInfo->dataFormat[2]==0x76 &&
  471. pInfo->dataFormat[3]==0x43 &&
  472. pInfo->formatVersion[0]==2 &&
  473. pInfo->formatVersion[1]>=1
  474. )) {
  475. udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
  476. pInfo->dataFormat[0], pInfo->dataFormat[1],
  477. pInfo->dataFormat[2], pInfo->dataFormat[3],
  478. pInfo->formatVersion[0], pInfo->formatVersion[1]);
  479. *pErrorCode=U_UNSUPPORTED_ERROR;
  480. return 0;
  481. }
  482. inBytes=(const uint8_t *)inData+headerSize;
  483. outBytes=(uint8_t *)outData+headerSize;
  484. inHeader=(const InverseUCATableHeader *)inBytes;
  485. outHeader=(InverseUCATableHeader *)outBytes;
  486. /*
  487. * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
  488. * starting with its size field.
  489. * sizeof(UCATableHeader)==8*4 in ICU 2.8
  490. * check the length against the header size before reading the size field
  491. */
  492. if(length<0) {
  493. header.byteSize=udata_readInt32(ds, inHeader->byteSize);
  494. } else if(
  495. ((length-headerSize)<(8*4) ||
  496. (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
  497. ) {
  498. udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
  499. length);
  500. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  501. return 0;
  502. }
  503. if(length>=0) {
  504. /* copy everything, takes care of data that needs no swapping */
  505. if(inBytes!=outBytes) {
  506. uprv_memcpy(outBytes, inBytes, header.byteSize);
  507. }
  508. /* swap the necessary pieces in the order of their occurrence in the data */
  509. /* read more of the InverseUCATableHeader (the byteSize field was read above) */
  510. header.tableSize= ds->readUInt32(inHeader->tableSize);
  511. header.contsSize= ds->readUInt32(inHeader->contsSize);
  512. header.table= ds->readUInt32(inHeader->table);
  513. header.conts= ds->readUInt32(inHeader->conts);
  514. /* swap the 32-bit integers in the header */
  515. ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
  516. /* swap the inverse table; tableSize counts uint32_t[3] rows */
  517. ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
  518. outBytes+header.table, pErrorCode);
  519. /* swap the continuation table; contsSize counts UChars */
  520. ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
  521. outBytes+header.conts, pErrorCode);
  522. }
  523. return headerSize+header.byteSize;
  524. }
  525. #endif /* #if !UCONFIG_NO_COLLATION */