ucnvhz.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2000-2015, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. * file name: ucnvhz.c
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2000oct16
  14. * created by: Ram Viswanadha
  15. * 10/31/2000 Ram Implemented offsets logic function
  16. *
  17. */
  18. #include "unicode/utypes.h"
  19. #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  20. #include "cmemory.h"
  21. #include "unicode/ucnv.h"
  22. #include "unicode/ucnv_cb.h"
  23. #include "unicode/uset.h"
  24. #include "unicode/utf16.h"
  25. #include "ucnv_bld.h"
  26. #include "ucnv_cnv.h"
  27. #include "ucnv_imp.h"
  28. #define UCNV_TILDE 0x7E /* ~ */
  29. #define UCNV_OPEN_BRACE 0x7B /* { */
  30. #define UCNV_CLOSE_BRACE 0x7D /* } */
  31. #define SB_ESCAPE "\x7E\x7D"
  32. #define DB_ESCAPE "\x7E\x7B"
  33. #define TILDE_ESCAPE "\x7E\x7E"
  34. #define ESC_LEN 2
  35. #define CONCAT_ESCAPE_MACRO(args, targetIndex,targetLength,strToAppend, err, len,sourceIndex) UPRV_BLOCK_MACRO_BEGIN { \
  36. while(len-->0){ \
  37. if(targetIndex < targetLength){ \
  38. args->target[targetIndex] = (unsigned char) *strToAppend; \
  39. if(args->offsets!=nullptr){ \
  40. *(offsets++) = sourceIndex-1; \
  41. } \
  42. targetIndex++; \
  43. } \
  44. else{ \
  45. args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
  46. *err =U_BUFFER_OVERFLOW_ERROR; \
  47. } \
  48. strToAppend++; \
  49. } \
  50. } UPRV_BLOCK_MACRO_END
  51. typedef struct{
  52. UConverter* gbConverter;
  53. int32_t targetIndex;
  54. int32_t sourceIndex;
  55. UBool isEscapeAppended;
  56. UBool isStateDBCS;
  57. UBool isTargetUCharDBCS;
  58. UBool isEmptySegment;
  59. }UConverterDataHZ;
  60. U_CDECL_BEGIN
  61. static void U_CALLCONV
  62. _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
  63. UConverter *gbConverter;
  64. if(pArgs->onlyTestIsLoadable) {
  65. ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */
  66. return;
  67. }
  68. gbConverter = ucnv_open("GBK", errorCode);
  69. if(U_FAILURE(*errorCode)) {
  70. return;
  71. }
  72. cnv->toUnicodeStatus = 0;
  73. cnv->fromUnicodeStatus= 0;
  74. cnv->mode=0;
  75. cnv->fromUChar32=0x0000;
  76. cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));
  77. if(cnv->extraInfo != nullptr){
  78. ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
  79. }
  80. else {
  81. ucnv_close(gbConverter);
  82. *errorCode = U_MEMORY_ALLOCATION_ERROR;
  83. return;
  84. }
  85. }
  86. static void U_CALLCONV
  87. _HZClose(UConverter *cnv){
  88. if(cnv->extraInfo != nullptr) {
  89. ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
  90. if(!cnv->isExtraLocal) {
  91. uprv_free(cnv->extraInfo);
  92. }
  93. cnv->extraInfo = nullptr;
  94. }
  95. }
  96. static void U_CALLCONV
  97. _HZReset(UConverter *cnv, UConverterResetChoice choice){
  98. if(choice<=UCNV_RESET_TO_UNICODE) {
  99. cnv->toUnicodeStatus = 0;
  100. cnv->mode=0;
  101. if(cnv->extraInfo != nullptr){
  102. ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = false;
  103. ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = false;
  104. }
  105. }
  106. if(choice!=UCNV_RESET_TO_UNICODE) {
  107. cnv->fromUnicodeStatus= 0;
  108. cnv->fromUChar32=0x0000;
  109. if(cnv->extraInfo != nullptr){
  110. ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = false;
  111. ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
  112. ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
  113. ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = false;
  114. }
  115. }
  116. }
  117. /**************************************HZ Encoding*************************************************
  118. * Rules for HZ encoding
  119. *
  120. * In ASCII mode, a byte is interpreted as an ASCII character, unless a
  121. * '~' is encountered. The character '~' is an escape character. By
  122. * convention, it must be immediately followed ONLY by '~', '{' or '\n'
  123. * (<LF>), with the following special meaning.
  124. * 1. The escape sequence '~~' is interpreted as a '~'.
  125. * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
  126. * 3. The escape sequence '~\n' is a line-continuation marker to be
  127. * consumed with no output produced.
  128. * In GB mode, characters are interpreted two bytes at a time as (pure)
  129. * GB codes until the escape-from-GB code '~}' is read. This code
  130. * switches the mode from GB back to ASCII. (Note that the escape-
  131. * from-GB code '~}' ($7E7D) is outside the defined GB range.)
  132. *
  133. * Source: RFC 1842
  134. *
  135. * Note that the formal syntax in RFC 1842 is invalid. I assume that the
  136. * intended definition of single-byte-segment is as follows (pedberg):
  137. * single-byte-segment = single-byte-seq 1*single-byte-char
  138. */
  139. static void U_CALLCONV
  140. UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
  141. UErrorCode* err){
  142. char tempBuf[2];
  143. const char *mySource = ( char *) args->source;
  144. char16_t *myTarget = args->target;
  145. const char *mySourceLimit = args->sourceLimit;
  146. UChar32 targetUniChar = 0x0000;
  147. int32_t mySourceChar = 0x0000;
  148. UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
  149. tempBuf[0]=0;
  150. tempBuf[1]=0;
  151. /* Calling code already handles this situation. */
  152. /*if ((args->converter == nullptr) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
  153. *err = U_ILLEGAL_ARGUMENT_ERROR;
  154. return;
  155. }*/
  156. while(mySource< mySourceLimit){
  157. if(myTarget < args->targetLimit){
  158. mySourceChar= (unsigned char) *mySource++;
  159. if(args->converter->mode == UCNV_TILDE) {
  160. /* second byte after ~ */
  161. args->converter->mode=0;
  162. switch(mySourceChar) {
  163. case 0x0A:
  164. /* no output for ~\n (line-continuation marker) */
  165. continue;
  166. case UCNV_TILDE:
  167. if(args->offsets) {
  168. args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
  169. }
  170. *(myTarget++)=(char16_t)mySourceChar;
  171. myData->isEmptySegment = false;
  172. continue;
  173. case UCNV_OPEN_BRACE:
  174. case UCNV_CLOSE_BRACE:
  175. myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
  176. if (myData->isEmptySegment) {
  177. myData->isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
  178. *err = U_ILLEGAL_ESCAPE_SEQUENCE;
  179. args->converter->toUCallbackReason = UCNV_IRREGULAR;
  180. args->converter->toUBytes[0] = UCNV_TILDE;
  181. args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
  182. args->converter->toULength = 2;
  183. args->target = myTarget;
  184. args->source = mySource;
  185. return;
  186. }
  187. myData->isEmptySegment = true;
  188. continue;
  189. default:
  190. /* if the first byte is equal to TILDE and the trail byte
  191. * is not a valid byte then it is an error condition
  192. */
  193. /*
  194. * Ticket 5691: consistent illegal sequences:
  195. * - We include at least the first byte in the illegal sequence.
  196. * - If any of the non-initial bytes could be the start of a character,
  197. * we stop the illegal sequence before the first one of those.
  198. */
  199. myData->isEmptySegment = false; /* different error here, reset this to avoid spurious future error */
  200. *err = U_ILLEGAL_ESCAPE_SEQUENCE;
  201. args->converter->toUBytes[0] = UCNV_TILDE;
  202. if( myData->isStateDBCS ?
  203. (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
  204. mySourceChar <= 0x7f
  205. ) {
  206. /* The current byte could be the start of a character: Back it out. */
  207. args->converter->toULength = 1;
  208. --mySource;
  209. } else {
  210. /* Include the current byte in the illegal sequence. */
  211. args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
  212. args->converter->toULength = 2;
  213. }
  214. args->target = myTarget;
  215. args->source = mySource;
  216. return;
  217. }
  218. } else if(myData->isStateDBCS) {
  219. if(args->converter->toUnicodeStatus == 0x00){
  220. /* lead byte */
  221. if(mySourceChar == UCNV_TILDE) {
  222. args->converter->mode = UCNV_TILDE;
  223. } else {
  224. /* add another bit to distinguish a 0 byte from not having seen a lead byte */
  225. args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
  226. myData->isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */
  227. }
  228. continue;
  229. }
  230. else{
  231. /* trail byte */
  232. int leadIsOk, trailIsOk;
  233. uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
  234. targetUniChar = 0xffff;
  235. /*
  236. * Ticket 5691: consistent illegal sequences:
  237. * - We include at least the first byte in the illegal sequence.
  238. * - If any of the non-initial bytes could be the start of a character,
  239. * we stop the illegal sequence before the first one of those.
  240. *
  241. * In HZ DBCS, if the second byte is in the 21..7e range,
  242. * we report only the first byte as the illegal sequence.
  243. * Otherwise we convert or report the pair of bytes.
  244. */
  245. leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
  246. trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
  247. if (leadIsOk && trailIsOk) {
  248. tempBuf[0] = (char) (leadByte+0x80) ;
  249. tempBuf[1] = (char) (mySourceChar+0x80);
  250. targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
  251. tempBuf, 2, args->converter->useFallback);
  252. mySourceChar= (leadByte << 8) | mySourceChar;
  253. } else if (trailIsOk) {
  254. /* report a single illegal byte and continue with the following DBCS starter byte */
  255. --mySource;
  256. mySourceChar = (int32_t)leadByte;
  257. } else {
  258. /* report a pair of illegal bytes if the second byte is not a DBCS starter */
  259. /* add another bit so that the code below writes 2 bytes in case of error */
  260. mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
  261. }
  262. args->converter->toUnicodeStatus =0x00;
  263. }
  264. }
  265. else{
  266. if(mySourceChar == UCNV_TILDE) {
  267. args->converter->mode = UCNV_TILDE;
  268. continue;
  269. } else if(mySourceChar <= 0x7f) {
  270. targetUniChar = (char16_t)mySourceChar; /* ASCII */
  271. myData->isEmptySegment = false; /* the segment has something valid */
  272. } else {
  273. targetUniChar = 0xffff;
  274. myData->isEmptySegment = false; /* different error here, reset this to avoid spurious future error */
  275. }
  276. }
  277. if(targetUniChar < 0xfffe){
  278. if(args->offsets) {
  279. args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
  280. }
  281. *(myTarget++)=(char16_t)targetUniChar;
  282. }
  283. else /* targetUniChar>=0xfffe */ {
  284. if(targetUniChar == 0xfffe){
  285. *err = U_INVALID_CHAR_FOUND;
  286. }
  287. else{
  288. *err = U_ILLEGAL_CHAR_FOUND;
  289. }
  290. if(mySourceChar > 0xff){
  291. args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
  292. args->converter->toUBytes[1] = (uint8_t)mySourceChar;
  293. args->converter->toULength=2;
  294. }
  295. else{
  296. args->converter->toUBytes[0] = (uint8_t)mySourceChar;
  297. args->converter->toULength=1;
  298. }
  299. break;
  300. }
  301. }
  302. else{
  303. *err =U_BUFFER_OVERFLOW_ERROR;
  304. break;
  305. }
  306. }
  307. args->target = myTarget;
  308. args->source = mySource;
  309. }
  310. static void U_CALLCONV
  311. UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
  312. UErrorCode * err){
  313. const char16_t *mySource = args->source;
  314. char *myTarget = args->target;
  315. int32_t* offsets = args->offsets;
  316. int32_t mySourceIndex = 0;
  317. int32_t myTargetIndex = 0;
  318. int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
  319. int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
  320. uint32_t targetUniChar = 0x0000;
  321. UChar32 mySourceChar = 0x0000;
  322. UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
  323. UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
  324. UBool oldIsTargetUCharDBCS;
  325. int len =0;
  326. const char* escSeq=nullptr;
  327. /* Calling code already handles this situation. */
  328. /*if ((args->converter == nullptr) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
  329. *err = U_ILLEGAL_ARGUMENT_ERROR;
  330. return;
  331. }*/
  332. if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
  333. goto getTrail;
  334. }
  335. /*writing the char to the output stream */
  336. while (mySourceIndex < mySourceLength){
  337. targetUniChar = missingCharMarker;
  338. if (myTargetIndex < targetLength){
  339. mySourceChar = (char16_t) mySource[mySourceIndex++];
  340. oldIsTargetUCharDBCS = isTargetUCharDBCS;
  341. if(mySourceChar ==UCNV_TILDE){
  342. /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
  343. len = ESC_LEN;
  344. escSeq = TILDE_ESCAPE;
  345. CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
  346. continue;
  347. } else if(mySourceChar <= 0x7f) {
  348. targetUniChar = mySourceChar;
  349. } else {
  350. int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
  351. mySourceChar,&targetUniChar,args->converter->useFallback);
  352. /* we can only use lead bytes 21..7D and trail bytes 21..7E */
  353. if( length == 2 &&
  354. (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
  355. (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
  356. ) {
  357. targetUniChar -= 0x8080;
  358. } else {
  359. targetUniChar = missingCharMarker;
  360. }
  361. }
  362. if (targetUniChar != missingCharMarker){
  363. myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
  364. if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
  365. /*Shifting from a double byte to single byte mode*/
  366. if(!isTargetUCharDBCS){
  367. len =ESC_LEN;
  368. escSeq = SB_ESCAPE;
  369. CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
  370. myConverterData->isEscapeAppended = true;
  371. }
  372. else{ /* Shifting from a single byte to double byte mode*/
  373. len =ESC_LEN;
  374. escSeq = DB_ESCAPE;
  375. CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
  376. myConverterData->isEscapeAppended = true;
  377. }
  378. }
  379. if(isTargetUCharDBCS){
  380. if( myTargetIndex <targetLength){
  381. myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
  382. if(offsets){
  383. *(offsets++) = mySourceIndex-1;
  384. }
  385. if(myTargetIndex < targetLength){
  386. myTarget[myTargetIndex++] =(char) targetUniChar;
  387. if(offsets){
  388. *(offsets++) = mySourceIndex-1;
  389. }
  390. }else{
  391. args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
  392. *err = U_BUFFER_OVERFLOW_ERROR;
  393. }
  394. }else{
  395. args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
  396. args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
  397. *err = U_BUFFER_OVERFLOW_ERROR;
  398. }
  399. }else{
  400. if( myTargetIndex <targetLength){
  401. myTarget[myTargetIndex++] = (char) (targetUniChar );
  402. if(offsets){
  403. *(offsets++) = mySourceIndex-1;
  404. }
  405. }else{
  406. args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
  407. *err = U_BUFFER_OVERFLOW_ERROR;
  408. }
  409. }
  410. }
  411. else{
  412. /* oops.. the code point is unassigned */
  413. /*Handle surrogates */
  414. /*check if the char is a First surrogate*/
  415. if(U16_IS_SURROGATE(mySourceChar)) {
  416. if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
  417. args->converter->fromUChar32=mySourceChar;
  418. getTrail:
  419. /*look ahead to find the trail surrogate*/
  420. if(mySourceIndex < mySourceLength) {
  421. /* test the following code unit */
  422. char16_t trail=(char16_t) args->source[mySourceIndex];
  423. if(U16_IS_TRAIL(trail)) {
  424. ++mySourceIndex;
  425. mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
  426. args->converter->fromUChar32=0x00;
  427. /* there are no surrogates in GB2312*/
  428. *err = U_INVALID_CHAR_FOUND;
  429. /* exit this condition tree */
  430. } else {
  431. /* this is an unmatched lead code unit (1st surrogate) */
  432. /* callback(illegal) */
  433. *err=U_ILLEGAL_CHAR_FOUND;
  434. }
  435. } else {
  436. /* no more input */
  437. *err = U_ZERO_ERROR;
  438. }
  439. } else {
  440. /* this is an unmatched trail code unit (2nd surrogate) */
  441. /* callback(illegal) */
  442. *err=U_ILLEGAL_CHAR_FOUND;
  443. }
  444. } else {
  445. /* callback(unassigned) for a BMP code point */
  446. *err = U_INVALID_CHAR_FOUND;
  447. }
  448. args->converter->fromUChar32=mySourceChar;
  449. break;
  450. }
  451. }
  452. else{
  453. *err = U_BUFFER_OVERFLOW_ERROR;
  454. break;
  455. }
  456. targetUniChar=missingCharMarker;
  457. }
  458. args->target += myTargetIndex;
  459. args->source += mySourceIndex;
  460. myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
  461. }
  462. static void U_CALLCONV
  463. _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
  464. UConverter *cnv = args->converter;
  465. UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
  466. char *p;
  467. char buffer[4];
  468. p = buffer;
  469. if( convData->isTargetUCharDBCS){
  470. *p++= UCNV_TILDE;
  471. *p++= UCNV_CLOSE_BRACE;
  472. convData->isTargetUCharDBCS=false;
  473. }
  474. *p++= (char)cnv->subChars[0];
  475. ucnv_cbFromUWriteBytes(args,
  476. buffer, (int32_t)(p - buffer),
  477. offsetIndex, err);
  478. }
  479. /*
  480. * Structure for cloning an HZ converter into a single memory block.
  481. */
  482. struct cloneHZStruct
  483. {
  484. UConverter cnv;
  485. UConverter subCnv;
  486. UConverterDataHZ mydata;
  487. };
  488. static UConverter * U_CALLCONV
  489. _HZ_SafeClone(const UConverter *cnv,
  490. void *stackBuffer,
  491. int32_t *pBufferSize,
  492. UErrorCode *status)
  493. {
  494. struct cloneHZStruct * localClone;
  495. int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
  496. if (U_FAILURE(*status)){
  497. return nullptr;
  498. }
  499. if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
  500. *pBufferSize = bufferSizeNeeded;
  501. return nullptr;
  502. }
  503. localClone = (struct cloneHZStruct *)stackBuffer;
  504. /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
  505. uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
  506. localClone->cnv.extraInfo = &localClone->mydata;
  507. localClone->cnv.isExtraLocal = true;
  508. /* deep-clone the sub-converter */
  509. size = (int32_t)sizeof(UConverter);
  510. ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
  511. ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
  512. return &localClone->cnv;
  513. }
  514. static void U_CALLCONV
  515. _HZ_GetUnicodeSet(const UConverter *cnv,
  516. const USetAdder *sa,
  517. UConverterUnicodeSet which,
  518. UErrorCode *pErrorCode) {
  519. /* HZ converts all of ASCII */
  520. sa->addRange(sa->set, 0, 0x7f);
  521. /* add all of the code points that the sub-converter handles */
  522. ucnv_MBCSGetFilteredUnicodeSetForUnicode(
  523. ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
  524. sa, which, UCNV_SET_FILTER_HZ,
  525. pErrorCode);
  526. }
  527. U_CDECL_END
  528. static const UConverterImpl _HZImpl={
  529. UCNV_HZ,
  530. nullptr,
  531. nullptr,
  532. _HZOpen,
  533. _HZClose,
  534. _HZReset,
  535. UConverter_toUnicode_HZ_OFFSETS_LOGIC,
  536. UConverter_toUnicode_HZ_OFFSETS_LOGIC,
  537. UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
  538. UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
  539. nullptr,
  540. nullptr,
  541. nullptr,
  542. _HZ_WriteSub,
  543. _HZ_SafeClone,
  544. _HZ_GetUnicodeSet,
  545. nullptr,
  546. nullptr
  547. };
  548. static const UConverterStaticData _HZStaticData={
  549. sizeof(UConverterStaticData),
  550. "HZ",
  551. 0,
  552. UCNV_IBM,
  553. UCNV_HZ,
  554. 1,
  555. 4,
  556. { 0x1a, 0, 0, 0 },
  557. 1,
  558. false,
  559. false,
  560. 0,
  561. 0,
  562. { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
  563. };
  564. const UConverterSharedData _HZData=
  565. UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl);
  566. #endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */