123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- *
- * Copyright (C) 1997-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *******************************************************************************
- * file name: loclikely.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2010feb25
- * created by: Markus W. Scherer
- *
- * Code for likely and minimized locale subtags, separated out from other .cpp files
- * that then do not depend on resource bundle code and likely-subtags data.
- */
- #include "unicode/bytestream.h"
- #include "unicode/utypes.h"
- #include "unicode/locid.h"
- #include "unicode/putil.h"
- #include "unicode/uchar.h"
- #include "unicode/uloc.h"
- #include "unicode/ures.h"
- #include "unicode/uscript.h"
- #include "bytesinkutil.h"
- #include "charstr.h"
- #include "cmemory.h"
- #include "cstring.h"
- #include "ulocimp.h"
- #include "ustr_imp.h"
- /**
- * These are the canonical strings for unknown languages, scripts and regions.
- **/
- static const char* const unknownLanguage = "und";
- static const char* const unknownScript = "Zzzz";
- static const char* const unknownRegion = "ZZ";
- /**
- * This function looks for the localeID in the likelySubtags resource.
- *
- * @param localeID The tag to find.
- * @param buffer A buffer to hold the matching entry
- * @param bufferLength The length of the output buffer
- * @return A pointer to "buffer" if found, or a null pointer if not.
- */
- static const char* U_CALLCONV
- findLikelySubtags(const char* localeID,
- char* buffer,
- int32_t bufferLength,
- UErrorCode* err) {
- const char* result = nullptr;
- if (!U_FAILURE(*err)) {
- int32_t resLen = 0;
- const char16_t* s = nullptr;
- UErrorCode tmpErr = U_ZERO_ERROR;
- icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr));
- if (U_SUCCESS(tmpErr)) {
- icu::CharString und;
- if (localeID != nullptr) {
- if (*localeID == '\0') {
- localeID = unknownLanguage;
- } else if (*localeID == '_') {
- und.append(unknownLanguage, *err);
- und.append(localeID, *err);
- if (U_FAILURE(*err)) {
- return nullptr;
- }
- localeID = und.data();
- }
- }
- s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
- if (U_FAILURE(tmpErr)) {
- /*
- * If a resource is missing, it's not really an error, it's
- * just that we don't have any data for that particular locale ID.
- */
- if (tmpErr != U_MISSING_RESOURCE_ERROR) {
- *err = tmpErr;
- }
- }
- else if (resLen >= bufferLength) {
- /* The buffer should never overflow. */
- *err = U_INTERNAL_PROGRAM_ERROR;
- }
- else {
- u_UCharsToChars(s, buffer, resLen + 1);
- if (resLen >= 3 &&
- uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
- (resLen == 3 || buffer[3] == '_')) {
- uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
- }
- result = buffer;
- }
- } else {
- *err = tmpErr;
- }
- }
- return result;
- }
- /**
- * Append a tag to a buffer, adding the separator if necessary. The buffer
- * must be large enough to contain the resulting tag plus any separator
- * necessary. The tag must not be a zero-length string.
- *
- * @param tag The tag to add.
- * @param tagLength The length of the tag.
- * @param buffer The output buffer.
- * @param bufferLength The length of the output buffer. This is an input/output parameter.
- **/
- static void U_CALLCONV
- appendTag(
- const char* tag,
- int32_t tagLength,
- char* buffer,
- int32_t* bufferLength,
- UBool withSeparator) {
- if (withSeparator) {
- buffer[*bufferLength] = '_';
- ++(*bufferLength);
- }
- uprv_memmove(
- &buffer[*bufferLength],
- tag,
- tagLength);
- *bufferLength += tagLength;
- }
- /**
- * Create a tag string from the supplied parameters. The lang, script and region
- * parameters may be nullptr pointers. If they are, their corresponding length parameters
- * must be less than or equal to 0.
- *
- * If any of the language, script or region parameters are empty, and the alternateTags
- * parameter is not nullptr, it will be parsed for potential language, script and region tags
- * to be used when constructing the new tag. If the alternateTags parameter is nullptr, or
- * it contains no language tag, the default tag for the unknown language is used.
- *
- * If the length of the new string exceeds the capacity of the output buffer,
- * the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
- * the tag string fits in the output buffer, but the null terminator doesn't.
- *
- * @param lang The language tag to use.
- * @param langLength The length of the language tag.
- * @param script The script tag to use.
- * @param scriptLength The length of the script tag.
- * @param region The region tag to use.
- * @param regionLength The length of the region tag.
- * @param trailing Any trailing data to append to the new tag.
- * @param trailingLength The length of the trailing data.
- * @param alternateTags A string containing any alternate tags.
- * @param sink The output sink receiving the tag string.
- * @param err A pointer to a UErrorCode for error reporting.
- **/
- static void U_CALLCONV
- createTagStringWithAlternates(
- const char* lang,
- int32_t langLength,
- const char* script,
- int32_t scriptLength,
- const char* region,
- int32_t regionLength,
- const char* trailing,
- int32_t trailingLength,
- const char* alternateTags,
- icu::ByteSink& sink,
- UErrorCode* err) {
- if (U_FAILURE(*err)) {
- goto error;
- }
- else if (langLength >= ULOC_LANG_CAPACITY ||
- scriptLength >= ULOC_SCRIPT_CAPACITY ||
- regionLength >= ULOC_COUNTRY_CAPACITY) {
- goto error;
- }
- else {
- /**
- * ULOC_FULLNAME_CAPACITY will provide enough capacity
- * that we can build a string that contains the language,
- * script and region code without worrying about overrunning
- * the user-supplied buffer.
- **/
- char tagBuffer[ULOC_FULLNAME_CAPACITY];
- int32_t tagLength = 0;
- UBool regionAppended = false;
- if (langLength > 0) {
- appendTag(
- lang,
- langLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/false);
- }
- else if (alternateTags == nullptr) {
- /*
- * Use the empty string for an unknown language, if
- * we found no language.
- */
- }
- else {
- /*
- * Parse the alternateTags string for the language.
- */
- char alternateLang[ULOC_LANG_CAPACITY];
- int32_t alternateLangLength = sizeof(alternateLang);
- alternateLangLength =
- uloc_getLanguage(
- alternateTags,
- alternateLang,
- alternateLangLength,
- err);
- if(U_FAILURE(*err) ||
- alternateLangLength >= ULOC_LANG_CAPACITY) {
- goto error;
- }
- else if (alternateLangLength == 0) {
- /*
- * Use the empty string for an unknown language, if
- * we found no language.
- */
- }
- else {
- appendTag(
- alternateLang,
- alternateLangLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/false);
- }
- }
- if (scriptLength > 0) {
- appendTag(
- script,
- scriptLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/true);
- }
- else if (alternateTags != nullptr) {
- /*
- * Parse the alternateTags string for the script.
- */
- char alternateScript[ULOC_SCRIPT_CAPACITY];
- const int32_t alternateScriptLength =
- uloc_getScript(
- alternateTags,
- alternateScript,
- sizeof(alternateScript),
- err);
- if (U_FAILURE(*err) ||
- alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
- goto error;
- }
- else if (alternateScriptLength > 0) {
- appendTag(
- alternateScript,
- alternateScriptLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/true);
- }
- }
- if (regionLength > 0) {
- appendTag(
- region,
- regionLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/true);
- regionAppended = true;
- }
- else if (alternateTags != nullptr) {
- /*
- * Parse the alternateTags string for the region.
- */
- char alternateRegion[ULOC_COUNTRY_CAPACITY];
- const int32_t alternateRegionLength =
- uloc_getCountry(
- alternateTags,
- alternateRegion,
- sizeof(alternateRegion),
- err);
- if (U_FAILURE(*err) ||
- alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
- goto error;
- }
- else if (alternateRegionLength > 0) {
- appendTag(
- alternateRegion,
- alternateRegionLength,
- tagBuffer,
- &tagLength,
- /*withSeparator=*/true);
- regionAppended = true;
- }
- }
- /**
- * Copy the partial tag from our internal buffer to the supplied
- * target.
- **/
- sink.Append(tagBuffer, tagLength);
- if (trailingLength > 0) {
- if (*trailing != '@') {
- sink.Append("_", 1);
- if (!regionAppended) {
- /* extra separator is required */
- sink.Append("_", 1);
- }
- }
- /*
- * Copy the trailing data into the supplied buffer.
- */
- sink.Append(trailing, trailingLength);
- }
- return;
- }
- error:
- /**
- * An overflow indicates the locale ID passed in
- * is ill-formed. If we got here, and there was
- * no previous error, it's an implicit overflow.
- **/
- if (*err == U_BUFFER_OVERFLOW_ERROR ||
- U_SUCCESS(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- /**
- * Create a tag string from the supplied parameters. The lang, script and region
- * parameters may be nullptr pointers. If they are, their corresponding length parameters
- * must be less than or equal to 0. If the lang parameter is an empty string, the
- * default value for an unknown language is written to the output buffer.
- *
- * If the length of the new string exceeds the capacity of the output buffer,
- * the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param lang The language tag to use.
- * @param langLength The length of the language tag.
- * @param script The script tag to use.
- * @param scriptLength The length of the script tag.
- * @param region The region tag to use.
- * @param regionLength The length of the region tag.
- * @param trailing Any trailing data to append to the new tag.
- * @param trailingLength The length of the trailing data.
- * @param sink The output sink receiving the tag string.
- * @param err A pointer to a UErrorCode for error reporting.
- **/
- static void U_CALLCONV
- createTagString(
- const char* lang,
- int32_t langLength,
- const char* script,
- int32_t scriptLength,
- const char* region,
- int32_t regionLength,
- const char* trailing,
- int32_t trailingLength,
- icu::ByteSink& sink,
- UErrorCode* err)
- {
- createTagStringWithAlternates(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- trailing,
- trailingLength,
- nullptr,
- sink,
- err);
- }
- /**
- * Parse the language, script, and region subtags from a tag string, and copy the
- * results into the corresponding output parameters. The buffers are null-terminated,
- * unless overflow occurs.
- *
- * The langLength, scriptLength, and regionLength parameters are input/output
- * parameters, and must contain the capacity of their corresponding buffers on
- * input. On output, they will contain the actual length of the buffers, not
- * including the null terminator.
- *
- * If the length of any of the output subtags exceeds the capacity of the corresponding
- * buffer, the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
- * occurs.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param localeID The locale ID to parse.
- * @param lang The language tag buffer.
- * @param langLength The length of the language tag.
- * @param script The script tag buffer.
- * @param scriptLength The length of the script tag.
- * @param region The region tag buffer.
- * @param regionLength The length of the region tag.
- * @param err A pointer to a UErrorCode for error reporting.
- * @return The number of chars of the localeID parameter consumed.
- **/
- static int32_t U_CALLCONV
- parseTagString(
- const char* localeID,
- char* lang,
- int32_t* langLength,
- char* script,
- int32_t* scriptLength,
- char* region,
- int32_t* regionLength,
- UErrorCode* err)
- {
- const char* position = localeID;
- int32_t subtagLength = 0;
- if(U_FAILURE(*err) ||
- localeID == nullptr ||
- lang == nullptr ||
- langLength == nullptr ||
- script == nullptr ||
- scriptLength == nullptr ||
- region == nullptr ||
- regionLength == nullptr) {
- goto error;
- }
- subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
- /*
- * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
- * to be an error, because it indicates the user-supplied tag is
- * not well-formed.
- */
- if(U_FAILURE(*err)) {
- goto error;
- }
- *langLength = subtagLength;
- /*
- * If no language was present, use the empty string instead.
- * Otherwise, move past any separator.
- */
- if (_isIDSeparator(*position)) {
- ++position;
- }
- subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- *scriptLength = subtagLength;
- if (*scriptLength > 0) {
- if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
- /**
- * If the script part is the "unknown" script, then don't return it.
- **/
- *scriptLength = 0;
- }
- /*
- * Move past any separator.
- */
- if (_isIDSeparator(*position)) {
- ++position;
- }
- }
- subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- *regionLength = subtagLength;
- if (*regionLength > 0) {
- if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
- /**
- * If the region part is the "unknown" region, then don't return it.
- **/
- *regionLength = 0;
- }
- } else if (*position != 0 && *position != '@') {
- /* back up over consumed trailing separator */
- --position;
- }
- exit:
- return (int32_t)(position - localeID);
- error:
- /**
- * If we get here, we have no explicit error, it's the result of an
- * illegal argument.
- **/
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- goto exit;
- }
- static UBool U_CALLCONV
- createLikelySubtagsString(
- const char* lang,
- int32_t langLength,
- const char* script,
- int32_t scriptLength,
- const char* region,
- int32_t regionLength,
- const char* variants,
- int32_t variantsLength,
- icu::ByteSink& sink,
- UErrorCode* err) {
- /**
- * ULOC_FULLNAME_CAPACITY will provide enough capacity
- * that we can build a string that contains the language,
- * script and region code without worrying about overrunning
- * the user-supplied buffer.
- **/
- char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
- if(U_FAILURE(*err)) {
- goto error;
- }
- /**
- * Try the language with the script and region first.
- **/
- if (scriptLength > 0 && regionLength > 0) {
- const char* likelySubtags = nullptr;
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- nullptr,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (likelySubtags != nullptr) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- nullptr,
- 0,
- nullptr,
- 0,
- nullptr,
- 0,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return true;
- }
- }
- /**
- * Try the language with just the script.
- **/
- if (scriptLength > 0) {
- const char* likelySubtags = nullptr;
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- nullptr,
- 0,
- nullptr,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (likelySubtags != nullptr) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- nullptr,
- 0,
- nullptr,
- 0,
- region,
- regionLength,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return true;
- }
- }
- /**
- * Try the language with just the region.
- **/
- if (regionLength > 0) {
- const char* likelySubtags = nullptr;
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- nullptr,
- 0,
- region,
- regionLength,
- nullptr,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (likelySubtags != nullptr) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- nullptr,
- 0,
- script,
- scriptLength,
- nullptr,
- 0,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return true;
- }
- }
- /**
- * Finally, try just the language.
- **/
- {
- const char* likelySubtags = nullptr;
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink sink(&tagBuffer);
- createTagString(
- lang,
- langLength,
- nullptr,
- 0,
- nullptr,
- 0,
- nullptr,
- 0,
- sink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- likelySubtags =
- findLikelySubtags(
- tagBuffer.data(),
- likelySubtagsBuffer,
- sizeof(likelySubtagsBuffer),
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (likelySubtags != nullptr) {
- /* Always use the language tag from the
- maximal string, since it may be more
- specific than the one provided. */
- createTagStringWithAlternates(
- nullptr,
- 0,
- script,
- scriptLength,
- region,
- regionLength,
- variants,
- variantsLength,
- likelySubtags,
- sink,
- err);
- return true;
- }
- }
- return false;
- error:
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return false;
- }
- #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t count = 0; \
- int32_t i; \
- for (i = 0; i < trailingLength; i++) { \
- if (trailing[i] == '-' || trailing[i] == '_') { \
- count = 0; \
- if (count > 8) { \
- goto error; \
- } \
- } else if (trailing[i] == '@') { \
- break; \
- } else if (count > 8) { \
- goto error; \
- } else { \
- count++; \
- } \
- } \
- } UPRV_BLOCK_MACRO_END
- static UBool
- _uloc_addLikelySubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err) {
- char lang[ULOC_LANG_CAPACITY];
- int32_t langLength = sizeof(lang);
- char script[ULOC_SCRIPT_CAPACITY];
- int32_t scriptLength = sizeof(script);
- char region[ULOC_COUNTRY_CAPACITY];
- int32_t regionLength = sizeof(region);
- const char* trailing = "";
- int32_t trailingLength = 0;
- int32_t trailingIndex = 0;
- UBool success = false;
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (localeID == nullptr) {
- goto error;
- }
- trailingIndex = parseTagString(
- localeID,
- lang,
- &langLength,
- script,
- &scriptLength,
- region,
- ®ionLength,
- err);
- if(U_FAILURE(*err)) {
- /* Overflow indicates an illegal argument error */
- if (*err == U_BUFFER_OVERFLOW_ERROR) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- goto error;
- }
- /* Find the length of the trailing portion. */
- while (_isIDSeparator(localeID[trailingIndex])) {
- trailingIndex++;
- }
- trailing = &localeID[trailingIndex];
- trailingLength = (int32_t)uprv_strlen(trailing);
- CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
- success =
- createLikelySubtagsString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- trailing,
- trailingLength,
- sink,
- err);
- if (!success) {
- const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
- /*
- * If we get here, we need to return localeID.
- */
- sink.Append(localeID, localIDLength);
- }
- return success;
- error:
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return false;
- }
- // Add likely subtags to the sink
- // return true if the value in the sink is produced by a match during the lookup
- // return false if the value in the sink is the same as input because there are
- // no match after the lookup.
- static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
- static void
- _uloc_minimizeSubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* err) {
- icu::CharString maximizedTagBuffer;
- char lang[ULOC_LANG_CAPACITY];
- int32_t langLength = sizeof(lang);
- char script[ULOC_SCRIPT_CAPACITY];
- int32_t scriptLength = sizeof(script);
- char region[ULOC_COUNTRY_CAPACITY];
- int32_t regionLength = sizeof(region);
- const char* trailing = "";
- int32_t trailingLength = 0;
- int32_t trailingIndex = 0;
- UBool successGetMax = false;
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (localeID == nullptr) {
- goto error;
- }
- trailingIndex =
- parseTagString(
- localeID,
- lang,
- &langLength,
- script,
- &scriptLength,
- region,
- ®ionLength,
- err);
- if(U_FAILURE(*err)) {
- /* Overflow indicates an illegal argument error */
- if (*err == U_BUFFER_OVERFLOW_ERROR) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- goto error;
- }
- /* Find the spot where the variants or the keywords begin, if any. */
- while (_isIDSeparator(localeID[trailingIndex])) {
- trailingIndex++;
- }
- trailing = &localeID[trailingIndex];
- trailingLength = (int32_t)uprv_strlen(trailing);
- CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
- {
- icu::CharString base;
- {
- icu::CharStringByteSink baseSink(&base);
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- nullptr,
- 0,
- baseSink,
- err);
- }
- /**
- * First, we need to first get the maximization
- * from AddLikelySubtags.
- **/
- {
- icu::CharStringByteSink maxSink(&maximizedTagBuffer);
- successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
- }
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- if (!successGetMax) {
- /**
- * If we got here, return the locale ID parameter unchanged.
- **/
- const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
- sink.Append(localeID, localeIDLength);
- return;
- }
- // In the following, the lang, script, region are referring to those in
- // the maximizedTagBuffer, not the one in the localeID.
- langLength = sizeof(lang);
- scriptLength = sizeof(script);
- regionLength = sizeof(region);
- parseTagString(
- maximizedTagBuffer.data(),
- lang,
- &langLength,
- script,
- &scriptLength,
- region,
- ®ionLength,
- err);
- if(U_FAILURE(*err)) {
- goto error;
- }
- /**
- * Start first with just the language.
- **/
- {
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink tagSink(&tagBuffer);
- createLikelySubtagsString(
- lang,
- langLength,
- nullptr,
- 0,
- nullptr,
- 0,
- nullptr,
- 0,
- tagSink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (!tagBuffer.isEmpty() &&
- uprv_strnicmp(
- maximizedTagBuffer.data(),
- tagBuffer.data(),
- tagBuffer.length()) == 0) {
- createTagString(
- lang,
- langLength,
- nullptr,
- 0,
- nullptr,
- 0,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- }
- /**
- * Next, try the language and region.
- **/
- if (regionLength > 0) {
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink tagSink(&tagBuffer);
- createLikelySubtagsString(
- lang,
- langLength,
- nullptr,
- 0,
- region,
- regionLength,
- nullptr,
- 0,
- tagSink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (!tagBuffer.isEmpty() &&
- uprv_strnicmp(
- maximizedTagBuffer.data(),
- tagBuffer.data(),
- tagBuffer.length()) == 0) {
- createTagString(
- lang,
- langLength,
- nullptr,
- 0,
- region,
- regionLength,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- }
- /**
- * Finally, try the language and script. This is our last chance,
- * since trying with all three subtags would only yield the
- * maximal version that we already have.
- **/
- if (scriptLength > 0) {
- icu::CharString tagBuffer;
- {
- icu::CharStringByteSink tagSink(&tagBuffer);
- createLikelySubtagsString(
- lang,
- langLength,
- script,
- scriptLength,
- nullptr,
- 0,
- nullptr,
- 0,
- tagSink,
- err);
- }
- if(U_FAILURE(*err)) {
- goto error;
- }
- else if (!tagBuffer.isEmpty() &&
- uprv_strnicmp(
- maximizedTagBuffer.data(),
- tagBuffer.data(),
- tagBuffer.length()) == 0) {
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- nullptr,
- 0,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- }
- {
- /**
- * If we got here, return the max + trail.
- **/
- createTagString(
- lang,
- langLength,
- script,
- scriptLength,
- region,
- regionLength,
- trailing,
- trailingLength,
- sink,
- err);
- return;
- }
- error:
- if (!U_FAILURE(*err)) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- static int32_t
- do_canonicalize(const char* localeID,
- char* buffer,
- int32_t bufferCapacity,
- UErrorCode* err)
- {
- int32_t canonicalizedSize = uloc_canonicalize(
- localeID,
- buffer,
- bufferCapacity,
- err);
- if (*err == U_STRING_NOT_TERMINATED_WARNING ||
- *err == U_BUFFER_OVERFLOW_ERROR) {
- return canonicalizedSize;
- }
- else if (U_FAILURE(*err)) {
- return -1;
- }
- else {
- return canonicalizedSize;
- }
- }
- U_CAPI int32_t U_EXPORT2
- uloc_addLikelySubtags(const char* localeID,
- char* maximizedLocaleID,
- int32_t maximizedLocaleIDCapacity,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
- icu::CheckedArrayByteSink sink(
- maximizedLocaleID, maximizedLocaleIDCapacity);
- ulocimp_addLikelySubtags(localeID, sink, status);
- int32_t reslen = sink.NumberOfBytesAppended();
- if (U_FAILURE(*status)) {
- return sink.Overflowed() ? reslen : -1;
- }
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(
- maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
- }
- return reslen;
- }
- static UBool
- _ulocimp_addLikelySubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* status) {
- PreflightingLocaleIDBuffer localeBuffer;
- do {
- localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
- localeBuffer.getCapacity(), status);
- } while (localeBuffer.needToTryAgain(status));
-
- if (U_SUCCESS(*status)) {
- return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
- } else {
- return false;
- }
- }
- U_CAPI void U_EXPORT2
- ulocimp_addLikelySubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* status) {
- _ulocimp_addLikelySubtags(localeID, sink, status);
- }
- U_CAPI int32_t U_EXPORT2
- uloc_minimizeSubtags(const char* localeID,
- char* minimizedLocaleID,
- int32_t minimizedLocaleIDCapacity,
- UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
- icu::CheckedArrayByteSink sink(
- minimizedLocaleID, minimizedLocaleIDCapacity);
- ulocimp_minimizeSubtags(localeID, sink, status);
- int32_t reslen = sink.NumberOfBytesAppended();
- if (U_FAILURE(*status)) {
- return sink.Overflowed() ? reslen : -1;
- }
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(
- minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
- }
- return reslen;
- }
- U_CAPI void U_EXPORT2
- ulocimp_minimizeSubtags(const char* localeID,
- icu::ByteSink& sink,
- UErrorCode* status) {
- PreflightingLocaleIDBuffer localeBuffer;
- do {
- localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
- localeBuffer.getCapacity(), status);
- } while (localeBuffer.needToTryAgain(status));
-
- _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
- }
- // Pairs of (language subtag, + or -) for finding out fast if common languages
- // are LTR (minus) or RTL (plus).
- static const char LANG_DIR_STRING[] =
- "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
- // Implemented here because this calls ulocimp_addLikelySubtags().
- U_CAPI UBool U_EXPORT2
- uloc_isRightToLeft(const char *locale) {
- UErrorCode errorCode = U_ZERO_ERROR;
- char script[8];
- int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
- scriptLength == 0) {
- // Fastpath: We know the likely scripts and their writing direction
- // for some common languages.
- errorCode = U_ZERO_ERROR;
- char lang[8];
- int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
- return false;
- }
- if (langLength > 0) {
- const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
- if (langPtr != nullptr) {
- switch (langPtr[langLength]) {
- case '-': return false;
- case '+': return true;
- default: break; // partial match of a longer code
- }
- }
- }
- // Otherwise, find the likely script.
- errorCode = U_ZERO_ERROR;
- icu::CharString likely;
- {
- icu::CharStringByteSink sink(&likely);
- ulocimp_addLikelySubtags(locale, sink, &errorCode);
- }
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
- return false;
- }
- scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
- if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
- scriptLength == 0) {
- return false;
- }
- }
- UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
- return uscript_isRightToLeft(scriptCode);
- }
- U_NAMESPACE_BEGIN
- UBool
- Locale::isRightToLeft() const {
- return uloc_isRightToLeft(getBaseName());
- }
- U_NAMESPACE_END
- // The following must at least allow for rg key value (6) plus terminator (1).
- #define ULOC_RG_BUFLEN 8
- U_CAPI int32_t U_EXPORT2
- ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
- char *region, int32_t regionCapacity, UErrorCode* status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
- char rgBuf[ULOC_RG_BUFLEN];
- UErrorCode rgStatus = U_ZERO_ERROR;
- // First check for rg keyword value
- int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
- if (U_FAILURE(rgStatus) || rgLen != 6) {
- rgLen = 0;
- } else {
- // rgBuf guaranteed to be zero terminated here, with text len 6
- char *rgPtr = rgBuf;
- for (; *rgPtr!= 0; rgPtr++) {
- *rgPtr = uprv_toupper(*rgPtr);
- }
- rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
- }
- if (rgLen == 0) {
- // No valid rg keyword value, try for unicode_region_subtag
- rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
- if (U_FAILURE(*status)) {
- rgLen = 0;
- } else if (rgLen == 0 && inferRegion) {
- // no unicode_region_subtag but inferRegion true, try likely subtags
- rgStatus = U_ZERO_ERROR;
- icu::CharString locBuf;
- {
- icu::CharStringByteSink sink(&locBuf);
- ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
- }
- if (U_SUCCESS(rgStatus)) {
- rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
- if (U_FAILURE(*status)) {
- rgLen = 0;
- }
- }
- }
- }
- rgBuf[rgLen] = 0;
- uprv_strncpy(region, rgBuf, regionCapacity);
- return u_terminateChars(region, regionCapacity, rgLen, status);
- }
|