123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- ******************************************************************************
- *
- * Copyright (C) 2000-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- ******************************************************************************
- * file name: ucnvscsu.c
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2000nov18
- * created by: Markus W. Scherer
- *
- * This is an implementation of the Standard Compression Scheme for Unicode
- * as defined in https://www.unicode.org/reports/tr6/ .
- * Reserved commands and window settings are treated as illegal sequences and
- * will result in callback calls.
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
- #include "unicode/ucnv.h"
- #include "unicode/ucnv_cb.h"
- #include "unicode/utf16.h"
- #include "ucnv_bld.h"
- #include "ucnv_cnv.h"
- #include "cmemory.h"
- /* SCSU definitions --------------------------------------------------------- */
- /* SCSU command byte values */
- enum {
- SQ0=0x01, /* Quote from window pair 0 */
- SQ7=0x08, /* Quote from window pair 7 */
- SDX=0x0B, /* Define a window as extended */
- Srs=0x0C, /* reserved */
- SQU=0x0E, /* Quote a single Unicode character */
- SCU=0x0F, /* Change to Unicode mode */
- SC0=0x10, /* Select window 0 */
- SC7=0x17, /* Select window 7 */
- SD0=0x18, /* Define and select window 0 */
- SD7=0x1F, /* Define and select window 7 */
- UC0=0xE0, /* Select window 0 */
- UC7=0xE7, /* Select window 7 */
- UD0=0xE8, /* Define and select window 0 */
- UD7=0xEF, /* Define and select window 7 */
- UQU=0xF0, /* Quote a single Unicode character */
- UDX=0xF1, /* Define a Window as extended */
- Urs=0xF2 /* reserved */
- };
- enum {
- /*
- * Unicode code points from 3400 to E000 are not adressible by
- * dynamic window, since in these areas no short run alphabets are
- * found. Therefore add gapOffset to all values from gapThreshold.
- */
- gapThreshold=0x68,
- gapOffset=0xAC00,
- /* values between reservedStart and fixedThreshold are reserved */
- reservedStart=0xA8,
- /* use table of predefined fixed offsets for values from fixedThreshold */
- fixedThreshold=0xF9
- };
- /* constant offsets for the 8 static windows */
- static const uint32_t staticOffsets[8]={
- 0x0000, /* ASCII for quoted tags */
- 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
- 0x0100, /* Latin Extended-A */
- 0x0300, /* Combining Diacritical Marks */
- 0x2000, /* General Punctuation */
- 0x2080, /* Currency Symbols */
- 0x2100, /* Letterlike Symbols and Number Forms */
- 0x3000 /* CJK Symbols and punctuation */
- };
- /* initial offsets for the 8 dynamic (sliding) windows */
- static const uint32_t initialDynamicOffsets[8]={
- 0x0080, /* Latin-1 */
- 0x00C0, /* Latin Extended A */
- 0x0400, /* Cyrillic */
- 0x0600, /* Arabic */
- 0x0900, /* Devanagari */
- 0x3040, /* Hiragana */
- 0x30A0, /* Katakana */
- 0xFF00 /* Fullwidth ASCII */
- };
- /* Table of fixed predefined Offsets */
- static const uint32_t fixedOffsets[]={
- /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
- /* 0xFA */ 0x0250, /* IPA extensions */
- /* 0xFB */ 0x0370, /* Greek */
- /* 0xFC */ 0x0530, /* Armenian */
- /* 0xFD */ 0x3040, /* Hiragana */
- /* 0xFE */ 0x30A0, /* Katakana */
- /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
- };
- /* state values */
- enum {
- readCommand,
- quotePairOne,
- quotePairTwo,
- quoteOne,
- definePairOne,
- definePairTwo,
- defineOne
- };
- typedef struct SCSUData {
- /* dynamic window offsets, initialize to default values from initialDynamicOffsets */
- uint32_t toUDynamicOffsets[8];
- uint32_t fromUDynamicOffsets[8];
- /* state machine state - toUnicode */
- UBool toUIsSingleByteMode;
- uint8_t toUState;
- int8_t toUQuoteWindow, toUDynamicWindow;
- uint8_t toUByteOne;
- uint8_t toUPadding[3];
- /* state machine state - fromUnicode */
- UBool fromUIsSingleByteMode;
- int8_t fromUDynamicWindow;
- /*
- * windowUse[] keeps track of the use of the dynamic windows:
- * At nextWindowUseIndex there is the least recently used window,
- * and the following windows (in a wrapping manner) are more and more
- * recently used.
- * At nextWindowUseIndex-1 there is the most recently used window.
- */
- uint8_t locale;
- int8_t nextWindowUseIndex;
- int8_t windowUse[8];
- } SCSUData;
- static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
- static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
- enum {
- lGeneric, l_ja
- };
- /* SCSU setup functions ----------------------------------------------------- */
- U_CDECL_BEGIN
- static void U_CALLCONV
- _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
- SCSUData *scsu=(SCSUData *)cnv->extraInfo;
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode */
- uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
- scsu->toUIsSingleByteMode=true;
- scsu->toUState=readCommand;
- scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
- scsu->toUByteOne=0;
- cnv->toULength=0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* reset fromUnicode */
- uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
- scsu->fromUIsSingleByteMode=true;
- scsu->fromUDynamicWindow=0;
- scsu->nextWindowUseIndex=0;
- switch(scsu->locale) {
- case l_ja:
- uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
- break;
- default:
- uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
- break;
- }
- cnv->fromUChar32=0;
- }
- }
- static void U_CALLCONV
- _SCSUOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- const char *locale=pArgs->locale;
- if(pArgs->onlyTestIsLoadable) {
- return;
- }
- cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
- if(cnv->extraInfo!=nullptr) {
- if(locale!=nullptr && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
- ((SCSUData *)cnv->extraInfo)->locale=l_ja;
- } else {
- ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
- }
- _SCSUReset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- /* Set the substitution character U+fffd as a Unicode string. */
- cnv->subUChars[0]=0xfffd;
- cnv->subCharLen=-1;
- }
- static void U_CALLCONV
- _SCSUClose(UConverter *cnv) {
- if(cnv->extraInfo!=nullptr) {
- if(!cnv->isExtraLocal) {
- uprv_free(cnv->extraInfo);
- }
- cnv->extraInfo=nullptr;
- }
- }
- /* SCSU-to-Unicode conversion functions ------------------------------------- */
- static void U_CALLCONV
- _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const uint8_t *source, *sourceLimit;
- char16_t *target;
- const char16_t *targetLimit;
- int32_t *offsets;
- UBool isSingleByteMode;
- uint8_t state, byteOne;
- int8_t quoteWindow, dynamicWindow;
- int32_t sourceIndex, nextSourceIndex;
- uint8_t b;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
- /* get the state machine state */
- isSingleByteMode=scsu->toUIsSingleByteMode;
- state=scsu->toUState;
- quoteWindow=scsu->toUQuoteWindow;
- dynamicWindow=scsu->toUDynamicWindow;
- byteOne=scsu->toUByteOne;
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=state==readCommand ? 0 : -1;
- nextSourceIndex=0;
- /*
- * conversion "loop"
- *
- * For performance, this is not a normal C loop.
- * Instead, there are two code blocks for the two SCSU modes.
- * The function branches to either one, and a change of the mode is done with a goto to
- * the other branch.
- *
- * Each branch has two conventional loops:
- * - a fast-path loop for the most common codes in the mode
- * - a loop for all other codes in the mode
- * When the fast-path runs into a code that it cannot handle, its loop ends and it
- * runs into the following loop to handle the other codes.
- * The end of the input or output buffer is also handled by the slower loop.
- * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
- *
- * The callback handling is done by returning with an error code.
- * The conversion framework actually calls the callback function.
- */
- if(isSingleByteMode) {
- /* fast path for single-byte mode */
- if(state==readCommand) {
- fastSingle:
- while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
- ++source;
- ++nextSourceIndex;
- if(b<=0x7f) {
- /* write US-ASCII graphic character or DEL */
- *target++=(char16_t)b;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- sourceIndex=nextSourceIndex;
- }
- }
- /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
- singleByteMode:
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- ++nextSourceIndex;
- switch(state) {
- case readCommand:
- /* redundant conditions are commented out */
- /* here: b<0x20 because otherwise we would be in fastSingle */
- if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(char16_t)b;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- } else if(SC0<=b) {
- if(b<=SC7) {
- dynamicWindow=(int8_t)(b-SC0);
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- } else /* if(SD0<=b && b<=SD7) */ {
- dynamicWindow=(int8_t)(b-SD0);
- state=defineOne;
- }
- } else if(/* SQ0<=b && */ b<=SQ7) {
- quoteWindow=(int8_t)(b-SQ0);
- state=quoteOne;
- } else if(b==SDX) {
- state=definePairOne;
- } else if(b==SQU) {
- state=quotePairOne;
- } else if(b==SCU) {
- sourceIndex=nextSourceIndex;
- isSingleByteMode=false;
- goto fastUnicode;
- } else /* Srs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- /* store the first byte of a multibyte sequence in toUBytes[] */
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case quoteOne:
- if(b<0x80) {
- /* all static offsets are in the BMP */
- *target++=(char16_t)(staticOffsets[quoteWindow]+b);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case definePairOne:
- dynamicWindow=(int8_t)((b>>5)&7);
- byteOne=(uint8_t)(b&0x1f);
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=definePairTwo;
- break;
- case definePairTwo:
- scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case defineOne:
- if(b==0) {
- /* callback(illegal): Reserved window offset value 0 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- } else if(b<gapThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
- } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
- scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
- } else if(b>=fixedThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
- } else {
- /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- }
- }
- } else {
- /* fast path for Unicode mode */
- if(state==readCommand) {
- fastUnicode:
- while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
- *target++=(char16_t)((b<<8)|source[1]);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- nextSourceIndex+=2;
- source+=2;
- }
- }
- /* normal state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- ++nextSourceIndex;
- switch(state) {
- case readCommand:
- if((uint8_t)(b-UC0)>(Urs-UC0)) {
- byteOne=b;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairTwo;
- } else if(/* UC0<=b && */ b<=UC7) {
- dynamicWindow=(int8_t)(b-UC0);
- sourceIndex=nextSourceIndex;
- isSingleByteMode=true;
- goto fastSingle;
- } else if(/* UD0<=b && */ b<=UD7) {
- dynamicWindow=(int8_t)(b-UD0);
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=defineOne;
- goto singleByteMode;
- } else if(b==UDX) {
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=definePairOne;
- goto singleByteMode;
- } else if(b==UQU) {
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairOne;
- } else /* Urs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastUnicode;
- }
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
- /* reset to deal with the next character */
- state=readCommand;
- } else if(state==readCommand) {
- /* not in a multi-byte sequence, reset toULength */
- cnv->toULength=0;
- }
- scsu->toUIsSingleByteMode=isSingleByteMode;
- scsu->toUState=state;
- scsu->toUQuoteWindow=quoteWindow;
- scsu->toUDynamicWindow=dynamicWindow;
- scsu->toUByteOne=byteOne;
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
- }
- /*
- * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
- static void U_CALLCONV
- _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const uint8_t *source, *sourceLimit;
- char16_t *target;
- const char16_t *targetLimit;
- UBool isSingleByteMode;
- uint8_t state, byteOne;
- int8_t quoteWindow, dynamicWindow;
- uint8_t b;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- /* get the state machine state */
- isSingleByteMode=scsu->toUIsSingleByteMode;
- state=scsu->toUState;
- quoteWindow=scsu->toUQuoteWindow;
- dynamicWindow=scsu->toUDynamicWindow;
- byteOne=scsu->toUByteOne;
- /*
- * conversion "loop"
- *
- * For performance, this is not a normal C loop.
- * Instead, there are two code blocks for the two SCSU modes.
- * The function branches to either one, and a change of the mode is done with a goto to
- * the other branch.
- *
- * Each branch has two conventional loops:
- * - a fast-path loop for the most common codes in the mode
- * - a loop for all other codes in the mode
- * When the fast-path runs into a code that it cannot handle, its loop ends and it
- * runs into the following loop to handle the other codes.
- * The end of the input or output buffer is also handled by the slower loop.
- * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
- *
- * The callback handling is done by returning with an error code.
- * The conversion framework actually calls the callback function.
- */
- if(isSingleByteMode) {
- /* fast path for single-byte mode */
- if(state==readCommand) {
- fastSingle:
- while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
- ++source;
- if(b<=0x7f) {
- /* write US-ASCII graphic character or DEL */
- *target++=(char16_t)b;
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- }
- }
- /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
- singleByteMode:
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- switch(state) {
- case readCommand:
- /* redundant conditions are commented out */
- /* here: b<0x20 because otherwise we would be in fastSingle */
- if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(char16_t)b;
- goto fastSingle;
- } else if(SC0<=b) {
- if(b<=SC7) {
- dynamicWindow=(int8_t)(b-SC0);
- goto fastSingle;
- } else /* if(SD0<=b && b<=SD7) */ {
- dynamicWindow=(int8_t)(b-SD0);
- state=defineOne;
- }
- } else if(/* SQ0<=b && */ b<=SQ7) {
- quoteWindow=(int8_t)(b-SQ0);
- state=quoteOne;
- } else if(b==SDX) {
- state=definePairOne;
- } else if(b==SQU) {
- state=quotePairOne;
- } else if(b==SCU) {
- isSingleByteMode=false;
- goto fastUnicode;
- } else /* Srs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- /* store the first byte of a multibyte sequence in toUBytes[] */
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- state=readCommand;
- goto fastSingle;
- case quoteOne:
- if(b<0x80) {
- /* all static offsets are in the BMP */
- *target++=(char16_t)(staticOffsets[quoteWindow]+b);
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- state=readCommand;
- goto fastSingle;
- case definePairOne:
- dynamicWindow=(int8_t)((b>>5)&7);
- byteOne=(uint8_t)(b&0x1f);
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=definePairTwo;
- break;
- case definePairTwo:
- scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
- state=readCommand;
- goto fastSingle;
- case defineOne:
- if(b==0) {
- /* callback(illegal): Reserved window offset value 0 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- } else if(b<gapThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
- } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
- scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
- } else if(b>=fixedThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
- } else {
- /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- }
- state=readCommand;
- goto fastSingle;
- }
- }
- } else {
- /* fast path for Unicode mode */
- if(state==readCommand) {
- fastUnicode:
- while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
- *target++=(char16_t)((b<<8)|source[1]);
- source+=2;
- }
- }
- /* normal state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- switch(state) {
- case readCommand:
- if((uint8_t)(b-UC0)>(Urs-UC0)) {
- byteOne=b;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairTwo;
- } else if(/* UC0<=b && */ b<=UC7) {
- dynamicWindow=(int8_t)(b-UC0);
- isSingleByteMode=true;
- goto fastSingle;
- } else if(/* UD0<=b && */ b<=UD7) {
- dynamicWindow=(int8_t)(b-UD0);
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=defineOne;
- goto singleByteMode;
- } else if(b==UDX) {
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=definePairOne;
- goto singleByteMode;
- } else if(b==UQU) {
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairOne;
- } else /* Urs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- state=readCommand;
- goto fastUnicode;
- }
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
- /* reset to deal with the next character */
- state=readCommand;
- } else if(state==readCommand) {
- /* not in a multi-byte sequence, reset toULength */
- cnv->toULength=0;
- }
- scsu->toUIsSingleByteMode=isSingleByteMode;
- scsu->toUState=state;
- scsu->toUQuoteWindow=quoteWindow;
- scsu->toUDynamicWindow=dynamicWindow;
- scsu->toUByteOne=byteOne;
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- return;
- }
- U_CDECL_END
- /* SCSU-from-Unicode conversion functions ----------------------------------- */
- /*
- * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
- * reasonable results. The lookahead is minimal.
- * Many cases are simple:
- * A character fits directly into the current mode, a dynamic or static window,
- * or is not compressible. These cases are tested first.
- * Real compression heuristics are applied to the rest, in code branches for
- * single/Unicode mode and BMP/supplementary code points.
- * The heuristics used here are extremely simple.
- */
- /* get the number of the window that this character is in, or -1 */
- static int8_t
- getWindow(const uint32_t offsets[8], uint32_t c) {
- int i;
- for(i=0; i<8; ++i) {
- if((uint32_t)(c-offsets[i])<=0x7f) {
- return (int8_t)(i);
- }
- }
- return -1;
- }
- /* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
- static UBool
- isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
- return (UBool)(c<=offset+0x7f &&
- (c>=offset || (c<=0x7f &&
- (c>=0x20 || (1UL<<c)&0x2601))));
- /* binary 0010 0110 0000 0001,
- check for b==0xd || b==0xa || b==9 || b==0 */
- }
- /*
- * getNextDynamicWindow returns the next dynamic window to be redefined
- */
- static int8_t
- getNextDynamicWindow(SCSUData *scsu) {
- int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
- if(++scsu->nextWindowUseIndex==8) {
- scsu->nextWindowUseIndex=0;
- }
- return window;
- }
- /*
- * useDynamicWindow() adjusts
- * windowUse[] and nextWindowUseIndex for the algorithm to choose
- * the next dynamic window to be defined;
- * a subclass may override it and provide its own algorithm.
- */
- static void
- useDynamicWindow(SCSUData *scsu, int8_t window) {
- /*
- * move the existing window, which just became the most recently used one,
- * up in windowUse[] to nextWindowUseIndex-1
- */
- /* first, find the index of the window - backwards to favor the more recently used windows */
- int i, j;
- i=scsu->nextWindowUseIndex;
- do {
- if(--i<0) {
- i=7;
- }
- } while(scsu->windowUse[i]!=window);
- /* now copy each windowUse[i+1] to [i] */
- j=i+1;
- if(j==8) {
- j=0;
- }
- while(j!=scsu->nextWindowUseIndex) {
- scsu->windowUse[i]=scsu->windowUse[j];
- i=j;
- if(++j==8) { j=0; }
- }
- /* finally, set the window into the most recently used index */
- scsu->windowUse[i]=window;
- }
- /*
- * calculate the offset and the code for a dynamic window that contains the character
- * takes fixed offsets into account
- * the offset of the window is stored in the offset variable,
- * the code is returned
- *
- * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
- */
- static int
- getDynamicOffset(uint32_t c, uint32_t *pOffset) {
- int i;
- for(i=0; i<7; ++i) {
- if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
- *pOffset=fixedOffsets[i];
- return 0xf9+i;
- }
- }
- if(c<0x80) {
- /* No dynamic window for US-ASCII. */
- return -1;
- } else if(c<0x3400 ||
- (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
- (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
- ) {
- /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
- *pOffset=c&0x7fffff80;
- return (int)(c>>7);
- } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
- /* For these characters we need to take the gapOffset into account. */
- *pOffset=c&0x7fffff80;
- return (int)((c-gapOffset)>>7);
- } else {
- return -1;
- }
- }
- U_CDECL_BEGIN
- /*
- * Idea for compression:
- * - save SCSUData and other state before really starting work
- * - at endloop, see if compression could be better with just unicode mode
- * - don't do this if a callback has been called
- * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
- * - different buffer handling!
- *
- * Drawback or need for corrective handling:
- * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
- * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
- * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
- *
- * How to achieve both?
- * - Only replace the result after an SDX or SCU?
- */
- static void U_CALLCONV
- _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const char16_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
- UBool isSingleByteMode;
- uint8_t dynamicWindow;
- uint32_t currentOffset;
- uint32_t c, delta;
- int32_t sourceIndex, nextSourceIndex;
- int32_t length;
- /* variables for compression heuristics */
- uint32_t offset;
- char16_t lead, trail;
- int code;
- int8_t window;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
- /* get the state machine state */
- isSingleByteMode=scsu->fromUIsSingleByteMode;
- dynamicWindow=scsu->fromUDynamicWindow;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- c=cnv->fromUChar32;
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
- /* similar conversion "loop" as in toUnicode */
- loop:
- if(isSingleByteMode) {
- if(c!=0 && targetCapacity>0) {
- goto getTrailSingle;
- }
- /* state machine for single-byte mode */
- /* singleByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- ++nextSourceIndex;
- if((c-0x20)<=0x5f) {
- /* pass US-ASCII graphic character through */
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if(c<0x20) {
- if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else {
- /* quote C0 control character */
- c|=SQ0<<8;
- length=2;
- goto outputBytes;
- }
- } else if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailSingle:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character U+10000..U+10ffff */
- if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* might check if there are more characters in this window to come */
- /* define an extended window with this character */
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* change to Unicode mode and output this (lead, trail) pair */
- isSingleByteMode=false;
- *target++=(uint8_t)SCU;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else if(c<0xa0) {
- /* quote C1 control character */
- c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
- length=2;
- goto outputBytes;
- } else if(c==0xfeff || c>=0xfff0) {
- /* quote signature character=byte order mark and specials */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* compress all other BMP characters */
- if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a window defined that contains this character - switch to it or quote from it? */
- if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
- /* change to dynamic window */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else {
- /* quote from dynamic window */
- c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
- length=2;
- goto outputBytes;
- }
- } else if((window=getWindow(staticOffsets, c))>=0) {
- /* quote from static window */
- c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
- (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * this character is not compressible (a BMP ideograph or similar);
- * switch to Unicode mode if this is the last character in the block
- * or there is at least one more ideograph following immediately
- */
- isSingleByteMode=false;
- c|=SCU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* quote Unicode */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- }
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- }
- } else {
- if(c!=0 && targetCapacity>0) {
- goto getTrailUnicode;
- }
- /* state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- ++nextSourceIndex;
- if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
- /* not compressible, write character directly */
- if(targetCapacity>=2) {
- *target++=(uint8_t)(c>>8);
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- targetCapacity-=2;
- } else {
- length=2;
- goto outputBytes;
- }
- } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
- /* compress BMP character if the following one is not an uncompressible ideograph */
- if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
- if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
- /* ASCII digit or letter */
- isSingleByteMode=true;
- c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
- length=2;
- goto outputBytes;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- isSingleByteMode=true;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- }
- }
- /* don't know how to compress this character, just write it directly */
- length=2;
- goto outputBytes;
- } else if(c<0xe000) {
- /* c is a surrogate */
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailUnicode:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character */
- if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
- !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * there is a dynamic window that contains this character and
- * the following character is not uncompressible,
- * change to the window
- */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
- (code=getDynamicOffset(c, &offset))>=0
- ) {
- /* two supplementary characters in (probably) the same window - define an extended one */
- isSingleByteMode=true;
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* don't know how to compress this character, just write it directly */
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else /* 0xe000<=c<0xf300 */ {
- /* quote to avoid SCSU tags */
- c|=UQU<<16;
- length=3;
- goto outputBytes;
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- scsu->fromUIsSingleByteMode=isSingleByteMode;
- scsu->fromUDynamicWindow=dynamicWindow;
- cnv->fromUChar32=c;
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
- return;
- outputBytes:
- /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- if(offsets==nullptr) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- } else {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- }
- targetCapacity-=length;
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- goto loop;
- } else {
- uint8_t *p;
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 0<=targetCapacity<length<=4 */
- /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
- length-=targetCapacity;
- p=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *p++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *p++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *p=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
- /* now output what fits into the regular target */
- c>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(c>>16);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- default:
- break;
- }
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- goto endloop;
- }
- }
- /*
- * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
- static void U_CALLCONV
- _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const char16_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- UBool isSingleByteMode;
- uint8_t dynamicWindow;
- uint32_t currentOffset;
- uint32_t c, delta;
- int32_t length;
- /* variables for compression heuristics */
- uint32_t offset;
- char16_t lead, trail;
- int code;
- int8_t window;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- /* get the state machine state */
- isSingleByteMode=scsu->fromUIsSingleByteMode;
- dynamicWindow=scsu->fromUDynamicWindow;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- c=cnv->fromUChar32;
- /* similar conversion "loop" as in toUnicode */
- loop:
- if(isSingleByteMode) {
- if(c!=0 && targetCapacity>0) {
- goto getTrailSingle;
- }
- /* state machine for single-byte mode */
- /* singleByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- if((c-0x20)<=0x5f) {
- /* pass US-ASCII graphic character through */
- *target++=(uint8_t)c;
- --targetCapacity;
- } else if(c<0x20) {
- if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(uint8_t)c;
- --targetCapacity;
- } else {
- /* quote C0 control character */
- c|=SQ0<<8;
- length=2;
- goto outputBytes;
- }
- } else if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- --targetCapacity;
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailSingle:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character U+10000..U+10ffff */
- if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- --targetCapacity;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* might check if there are more characters in this window to come */
- /* define an extended window with this character */
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* change to Unicode mode and output this (lead, trail) pair */
- isSingleByteMode=false;
- *target++=(uint8_t)SCU;
- --targetCapacity;
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else if(c<0xa0) {
- /* quote C1 control character */
- c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
- length=2;
- goto outputBytes;
- } else if(c==0xfeff || c>=0xfff0) {
- /* quote signature character=byte order mark and specials */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* compress all other BMP characters */
- if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a window defined that contains this character - switch to it or quote from it? */
- if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
- /* change to dynamic window */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else {
- /* quote from dynamic window */
- c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
- length=2;
- goto outputBytes;
- }
- } else if((window=getWindow(staticOffsets, c))>=0) {
- /* quote from static window */
- c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
- (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * this character is not compressible (a BMP ideograph or similar);
- * switch to Unicode mode if this is the last character in the block
- * or there is at least one more ideograph following immediately
- */
- isSingleByteMode=false;
- c|=SCU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* quote Unicode */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- }
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- }
- } else {
- if(c!=0 && targetCapacity>0) {
- goto getTrailUnicode;
- }
- /* state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
- /* not compressible, write character directly */
- if(targetCapacity>=2) {
- *target++=(uint8_t)(c>>8);
- *target++=(uint8_t)c;
- targetCapacity-=2;
- } else {
- length=2;
- goto outputBytes;
- }
- } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
- /* compress BMP character if the following one is not an uncompressible ideograph */
- if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
- if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
- /* ASCII digit or letter */
- isSingleByteMode=true;
- c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
- length=2;
- goto outputBytes;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- isSingleByteMode=true;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- }
- }
- /* don't know how to compress this character, just write it directly */
- length=2;
- goto outputBytes;
- } else if(c<0xe000) {
- /* c is a surrogate */
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailUnicode:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character */
- if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
- !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * there is a dynamic window that contains this character and
- * the following character is not uncompressible,
- * change to the window
- */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
- (code=getDynamicOffset(c, &offset))>=0
- ) {
- /* two supplementary characters in (probably) the same window - define an extended one */
- isSingleByteMode=true;
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* don't know how to compress this character, just write it directly */
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else /* 0xe000<=c<0xf300 */ {
- /* quote to avoid SCSU tags */
- c|=UQU<<16;
- length=3;
- goto outputBytes;
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- scsu->fromUIsSingleByteMode=isSingleByteMode;
- scsu->fromUDynamicWindow=dynamicWindow;
- cnv->fromUChar32=c;
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- return;
- outputBytes:
- /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
- /* normal end of conversion: prepare for a new character */
- c=0;
- goto loop;
- } else {
- uint8_t *p;
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 0<=targetCapacity<length<=4 */
- /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
- length-=targetCapacity;
- p=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *p++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *p++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *p=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
- /* now output what fits into the regular target */
- c = (length == 4) ? 0 : c >> 8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- break;
- }
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- goto endloop;
- }
- }
- /* miscellaneous ------------------------------------------------------------ */
- static const char * U_CALLCONV
- _SCSUGetName(const UConverter *cnv) {
- SCSUData *scsu=(SCSUData *)cnv->extraInfo;
- switch(scsu->locale) {
- case l_ja:
- return "SCSU,locale=ja";
- default:
- return "SCSU";
- }
- }
- /* structure for SafeClone calculations */
- struct cloneSCSUStruct
- {
- UConverter cnv;
- SCSUData mydata;
- };
- static UConverter * U_CALLCONV
- _SCSUSafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
- {
- struct cloneSCSUStruct * localClone;
- int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
- if (U_FAILURE(*status)){
- return 0;
- }
- if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- return 0;
- }
- localClone = (struct cloneSCSUStruct *)stackBuffer;
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
- uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
- localClone->cnv.extraInfo = &localClone->mydata;
- localClone->cnv.isExtraLocal = true;
- return &localClone->cnv;
- }
- U_CDECL_END
- static const UConverterImpl _SCSUImpl={
- UCNV_SCSU,
- nullptr,
- nullptr,
- _SCSUOpen,
- _SCSUClose,
- _SCSUReset,
- _SCSUToUnicode,
- _SCSUToUnicodeWithOffsets,
- _SCSUFromUnicode,
- _SCSUFromUnicodeWithOffsets,
- nullptr,
- nullptr,
- _SCSUGetName,
- nullptr,
- _SCSUSafeClone,
- ucnv_getCompleteUnicodeSet,
- nullptr,
- nullptr
- };
- static const UConverterStaticData _SCSUStaticData={
- sizeof(UConverterStaticData),
- "SCSU",
- 1212, /* CCSID for SCSU */
- UCNV_IBM, UCNV_SCSU,
- 1, 3, /* one char16_t generates at least 1 byte and at most 3 bytes */
- /*
- * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
- * substitution string.
- */
- { 0x0e, 0xff, 0xfd, 0 }, 3,
- false, false,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
- };
- const UConverterSharedData _SCSUData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
- #endif
|