udataswp.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: udataswp.c
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003jun05
  16. * created by: Markus W. Scherer
  17. *
  18. * Definitions for ICU data transformations for different platforms,
  19. * changing between big- and little-endian data and/or between
  20. * charset families (ASCII<->EBCDIC).
  21. */
  22. #include <stdarg.h>
  23. #include "unicode/utypes.h"
  24. #include "unicode/udata.h" /* UDataInfo */
  25. #include "ucmndata.h" /* DataHeader */
  26. #include "cmemory.h"
  27. #include "udataswp.h"
  28. /* swapping primitives ------------------------------------------------------ */
  29. static int32_t U_CALLCONV
  30. uprv_swapArray16(const UDataSwapper *ds,
  31. const void *inData, int32_t length, void *outData,
  32. UErrorCode *pErrorCode) {
  33. const uint16_t *p;
  34. uint16_t *q;
  35. int32_t count;
  36. uint16_t x;
  37. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  38. return 0;
  39. }
  40. if(ds==nullptr || inData==nullptr || length<0 || (length&1)!=0 || outData==nullptr) {
  41. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  42. return 0;
  43. }
  44. /* setup and swapping */
  45. p=(const uint16_t *)inData;
  46. q=(uint16_t *)outData;
  47. count=length/2;
  48. while(count>0) {
  49. x=*p++;
  50. *q++=(uint16_t)((x<<8)|(x>>8));
  51. --count;
  52. }
  53. return length;
  54. }
  55. static int32_t U_CALLCONV
  56. uprv_copyArray16(const UDataSwapper *ds,
  57. const void *inData, int32_t length, void *outData,
  58. UErrorCode *pErrorCode) {
  59. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  60. return 0;
  61. }
  62. if(ds==nullptr || inData==nullptr || length<0 || (length&1)!=0 || outData==nullptr) {
  63. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  64. return 0;
  65. }
  66. if(length>0 && inData!=outData) {
  67. uprv_memcpy(outData, inData, length);
  68. }
  69. return length;
  70. }
  71. static int32_t U_CALLCONV
  72. uprv_swapArray32(const UDataSwapper *ds,
  73. const void *inData, int32_t length, void *outData,
  74. UErrorCode *pErrorCode) {
  75. const uint32_t *p;
  76. uint32_t *q;
  77. int32_t count;
  78. uint32_t x;
  79. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  80. return 0;
  81. }
  82. if(ds==nullptr || inData==nullptr || length<0 || (length&3)!=0 || outData==nullptr) {
  83. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  84. return 0;
  85. }
  86. /* setup and swapping */
  87. p=(const uint32_t *)inData;
  88. q=(uint32_t *)outData;
  89. count=length/4;
  90. while(count>0) {
  91. x=*p++;
  92. *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
  93. --count;
  94. }
  95. return length;
  96. }
  97. static int32_t U_CALLCONV
  98. uprv_copyArray32(const UDataSwapper *ds,
  99. const void *inData, int32_t length, void *outData,
  100. UErrorCode *pErrorCode) {
  101. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  102. return 0;
  103. }
  104. if(ds==nullptr || inData==nullptr || length<0 || (length&3)!=0 || outData==nullptr) {
  105. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  106. return 0;
  107. }
  108. if(length>0 && inData!=outData) {
  109. uprv_memcpy(outData, inData, length);
  110. }
  111. return length;
  112. }
  113. static int32_t U_CALLCONV
  114. uprv_swapArray64(const UDataSwapper *ds,
  115. const void *inData, int32_t length, void *outData,
  116. UErrorCode *pErrorCode) {
  117. const uint64_t *p;
  118. uint64_t *q;
  119. int32_t count;
  120. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  121. return 0;
  122. }
  123. if(ds==nullptr || inData==nullptr || length<0 || (length&7)!=0 || outData==nullptr) {
  124. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  125. return 0;
  126. }
  127. /* setup and swapping */
  128. p=(const uint64_t *)inData;
  129. q=(uint64_t *)outData;
  130. count=length/8;
  131. while(count>0) {
  132. uint64_t x=*p++;
  133. x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
  134. ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
  135. *q++=x;
  136. --count;
  137. }
  138. return length;
  139. }
  140. static int32_t U_CALLCONV
  141. uprv_copyArray64(const UDataSwapper *ds,
  142. const void *inData, int32_t length, void *outData,
  143. UErrorCode *pErrorCode) {
  144. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  145. return 0;
  146. }
  147. if(ds==nullptr || inData==nullptr || length<0 || (length&7)!=0 || outData==nullptr) {
  148. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  149. return 0;
  150. }
  151. if(length>0 && inData!=outData) {
  152. uprv_memcpy(outData, inData, length);
  153. }
  154. return length;
  155. }
  156. static uint16_t U_CALLCONV
  157. uprv_readSwapUInt16(uint16_t x) {
  158. return (uint16_t)((x<<8)|(x>>8));
  159. }
  160. static uint16_t U_CALLCONV
  161. uprv_readDirectUInt16(uint16_t x) {
  162. return x;
  163. }
  164. static uint32_t U_CALLCONV
  165. uprv_readSwapUInt32(uint32_t x) {
  166. return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
  167. }
  168. static uint32_t U_CALLCONV
  169. uprv_readDirectUInt32(uint32_t x) {
  170. return x;
  171. }
  172. static void U_CALLCONV
  173. uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
  174. *p=(uint16_t)((x<<8)|(x>>8));
  175. }
  176. static void U_CALLCONV
  177. uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
  178. *p=x;
  179. }
  180. static void U_CALLCONV
  181. uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
  182. *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
  183. }
  184. static void U_CALLCONV
  185. uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
  186. *p=x;
  187. }
  188. U_CAPI int16_t U_EXPORT2
  189. udata_readInt16(const UDataSwapper *ds, int16_t x) {
  190. return (int16_t)ds->readUInt16((uint16_t)x);
  191. }
  192. U_CAPI int32_t U_EXPORT2
  193. udata_readInt32(const UDataSwapper *ds, int32_t x) {
  194. return (int32_t)ds->readUInt32((uint32_t)x);
  195. }
  196. /**
  197. * Swap a block of invariant, NUL-terminated strings, but not padding
  198. * bytes after the last string.
  199. * @internal
  200. */
  201. U_CAPI int32_t U_EXPORT2
  202. udata_swapInvStringBlock(const UDataSwapper *ds,
  203. const void *inData, int32_t length, void *outData,
  204. UErrorCode *pErrorCode) {
  205. const char *inChars;
  206. int32_t stringsLength;
  207. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  208. return 0;
  209. }
  210. if(ds==nullptr || inData==nullptr || length<0 || (length>0 && outData==nullptr)) {
  211. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  212. return 0;
  213. }
  214. /* reduce the strings length to not include bytes after the last NUL */
  215. inChars=(const char *)inData;
  216. stringsLength=length;
  217. while(stringsLength>0 && inChars[stringsLength-1]!=0) {
  218. --stringsLength;
  219. }
  220. /* swap up to the last NUL */
  221. ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
  222. /* copy the bytes after the last NUL */
  223. if(inData!=outData && length>stringsLength) {
  224. uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
  225. }
  226. /* return the length including padding bytes */
  227. if(U_SUCCESS(*pErrorCode)) {
  228. return length;
  229. } else {
  230. return 0;
  231. }
  232. }
  233. U_CAPI void U_EXPORT2
  234. udata_printError(const UDataSwapper *ds,
  235. const char *fmt,
  236. ...) {
  237. va_list args;
  238. if(ds->printError!=nullptr) {
  239. va_start(args, fmt);
  240. ds->printError(ds->printErrorContext, fmt, args);
  241. va_end(args);
  242. }
  243. }
  244. /* swap a data header ------------------------------------------------------- */
  245. U_CAPI int32_t U_EXPORT2
  246. udata_swapDataHeader(const UDataSwapper *ds,
  247. const void *inData, int32_t length, void *outData,
  248. UErrorCode *pErrorCode) {
  249. const DataHeader *pHeader;
  250. uint16_t headerSize, infoSize;
  251. /* argument checking */
  252. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  253. return 0;
  254. }
  255. if(ds==nullptr || inData==nullptr || length<-1 || (length>0 && outData==nullptr)) {
  256. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  257. return 0;
  258. }
  259. /* check minimum length and magic bytes */
  260. pHeader=(const DataHeader *)inData;
  261. if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
  262. pHeader->dataHeader.magic1!=0xda ||
  263. pHeader->dataHeader.magic2!=0x27 ||
  264. pHeader->info.sizeofUChar!=2
  265. ) {
  266. udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
  267. *pErrorCode=U_UNSUPPORTED_ERROR;
  268. return 0;
  269. }
  270. headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
  271. infoSize=ds->readUInt16(pHeader->info.size);
  272. if( headerSize<sizeof(DataHeader) ||
  273. infoSize<sizeof(UDataInfo) ||
  274. headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
  275. (length>=0 && length<headerSize)
  276. ) {
  277. udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
  278. headerSize, infoSize, length);
  279. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  280. return 0;
  281. }
  282. if(length>0) {
  283. DataHeader *outHeader;
  284. const char *s;
  285. int32_t maxLength;
  286. /* Most of the fields are just bytes and need no swapping. */
  287. if(inData!=outData) {
  288. uprv_memcpy(outData, inData, headerSize);
  289. }
  290. outHeader=(DataHeader *)outData;
  291. outHeader->info.isBigEndian = ds->outIsBigEndian;
  292. outHeader->info.charsetFamily = ds->outCharset;
  293. /* swap headerSize */
  294. ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
  295. /* swap UDataInfo size and reservedWord */
  296. ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
  297. /* swap copyright statement after the UDataInfo */
  298. infoSize+=sizeof(pHeader->dataHeader);
  299. s=(const char *)inData+infoSize;
  300. maxLength=headerSize-infoSize;
  301. /* get the length of the string */
  302. for(length=0; length<maxLength && s[length]!=0; ++length) {}
  303. /* swap the string contents */
  304. ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
  305. }
  306. return headerSize;
  307. }
  308. /* API functions ------------------------------------------------------------ */
  309. U_CAPI UDataSwapper * U_EXPORT2
  310. udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
  311. UBool outIsBigEndian, uint8_t outCharset,
  312. UErrorCode *pErrorCode) {
  313. UDataSwapper *swapper;
  314. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  315. return nullptr;
  316. }
  317. if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
  318. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  319. return nullptr;
  320. }
  321. /* allocate the swapper */
  322. swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper));
  323. if(swapper==nullptr) {
  324. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  325. return nullptr;
  326. }
  327. uprv_memset(swapper, 0, sizeof(UDataSwapper));
  328. /* set values and functions pointers according to in/out parameters */
  329. swapper->inIsBigEndian=inIsBigEndian;
  330. swapper->inCharset=inCharset;
  331. swapper->outIsBigEndian=outIsBigEndian;
  332. swapper->outCharset=outCharset;
  333. swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
  334. swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
  335. swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
  336. swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
  337. swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
  338. if(inIsBigEndian==outIsBigEndian) {
  339. swapper->swapArray16=uprv_copyArray16;
  340. swapper->swapArray32=uprv_copyArray32;
  341. swapper->swapArray64=uprv_copyArray64;
  342. } else {
  343. swapper->swapArray16=uprv_swapArray16;
  344. swapper->swapArray32=uprv_swapArray32;
  345. swapper->swapArray64=uprv_swapArray64;
  346. }
  347. if(inCharset==U_ASCII_FAMILY) {
  348. swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
  349. } else /* U_EBCDIC_FAMILY */ {
  350. swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
  351. }
  352. return swapper;
  353. }
  354. U_CAPI UDataSwapper * U_EXPORT2
  355. udata_openSwapperForInputData(const void *data, int32_t length,
  356. UBool outIsBigEndian, uint8_t outCharset,
  357. UErrorCode *pErrorCode) {
  358. const DataHeader *pHeader;
  359. uint16_t headerSize, infoSize;
  360. UBool inIsBigEndian;
  361. int8_t inCharset;
  362. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  363. return nullptr;
  364. }
  365. if( data==nullptr ||
  366. (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
  367. outCharset>U_EBCDIC_FAMILY
  368. ) {
  369. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  370. return nullptr;
  371. }
  372. pHeader=(const DataHeader *)data;
  373. if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
  374. pHeader->dataHeader.magic1!=0xda ||
  375. pHeader->dataHeader.magic2!=0x27 ||
  376. pHeader->info.sizeofUChar!=2
  377. ) {
  378. *pErrorCode=U_UNSUPPORTED_ERROR;
  379. return 0;
  380. }
  381. inIsBigEndian=(UBool)pHeader->info.isBigEndian;
  382. inCharset=pHeader->info.charsetFamily;
  383. if(inIsBigEndian==U_IS_BIG_ENDIAN) {
  384. headerSize=pHeader->dataHeader.headerSize;
  385. infoSize=pHeader->info.size;
  386. } else {
  387. headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
  388. infoSize=uprv_readSwapUInt16(pHeader->info.size);
  389. }
  390. if( headerSize<sizeof(DataHeader) ||
  391. infoSize<sizeof(UDataInfo) ||
  392. headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
  393. (length>=0 && length<headerSize)
  394. ) {
  395. *pErrorCode=U_UNSUPPORTED_ERROR;
  396. return 0;
  397. }
  398. return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
  399. }
  400. U_CAPI void U_EXPORT2
  401. udata_closeSwapper(UDataSwapper *ds) {
  402. uprv_free(ds);
  403. }