ulocimp.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2004-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. #ifndef ULOCIMP_H
  10. #define ULOCIMP_H
  11. #include "unicode/bytestream.h"
  12. #include "unicode/uloc.h"
  13. #include "charstr.h"
  14. /**
  15. * Create an iterator over the specified keywords list
  16. * @param keywordList double-null terminated list. Will be copied.
  17. * @param keywordListSize size in bytes of keywordList
  18. * @param status err code
  19. * @return enumeration (owned by caller) of the keyword list.
  20. * @internal ICU 3.0
  21. */
  22. U_CAPI UEnumeration* U_EXPORT2
  23. uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
  24. /**
  25. * Look up a resource bundle table item with fallback on the table level.
  26. * This is accessible so it can be called by C++ code.
  27. */
  28. U_CAPI const UChar * U_EXPORT2
  29. uloc_getTableStringWithFallback(
  30. const char *path,
  31. const char *locale,
  32. const char *tableKey,
  33. const char *subTableKey,
  34. const char *itemKey,
  35. int32_t *pLength,
  36. UErrorCode *pErrorCode);
  37. /*returns true if a is an ID separator false otherwise*/
  38. #define _isIDSeparator(a) (a == '_' || a == '-')
  39. U_CFUNC const char*
  40. uloc_getCurrentCountryID(const char* oldID);
  41. U_CFUNC const char*
  42. uloc_getCurrentLanguageID(const char* oldID);
  43. U_CFUNC void
  44. ulocimp_getKeywords(const char *localeID,
  45. char prev,
  46. icu::ByteSink& sink,
  47. UBool valuesToo,
  48. UErrorCode *status);
  49. icu::CharString U_EXPORT2
  50. ulocimp_getLanguage(const char *localeID,
  51. const char **pEnd,
  52. UErrorCode &status);
  53. icu::CharString U_EXPORT2
  54. ulocimp_getScript(const char *localeID,
  55. const char **pEnd,
  56. UErrorCode &status);
  57. icu::CharString U_EXPORT2
  58. ulocimp_getCountry(const char *localeID,
  59. const char **pEnd,
  60. UErrorCode &status);
  61. U_CAPI void U_EXPORT2
  62. ulocimp_getName(const char* localeID,
  63. icu::ByteSink& sink,
  64. UErrorCode* err);
  65. U_CAPI void U_EXPORT2
  66. ulocimp_getBaseName(const char* localeID,
  67. icu::ByteSink& sink,
  68. UErrorCode* err);
  69. U_CAPI void U_EXPORT2
  70. ulocimp_canonicalize(const char* localeID,
  71. icu::ByteSink& sink,
  72. UErrorCode* err);
  73. U_CAPI void U_EXPORT2
  74. ulocimp_getKeywordValue(const char* localeID,
  75. const char* keywordName,
  76. icu::ByteSink& sink,
  77. UErrorCode* status);
  78. /**
  79. * Writes a well-formed language tag for this locale ID.
  80. *
  81. * **Note**: When `strict` is false, any locale fields which do not satisfy the
  82. * BCP47 syntax requirement will be omitted from the result. When `strict` is
  83. * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
  84. * fields do not satisfy the BCP47 syntax requirement.
  85. *
  86. * @param localeID the input locale ID
  87. * @param sink the output sink receiving the BCP47 language
  88. * tag for this Locale.
  89. * @param strict boolean value indicating if the function returns
  90. * an error for an ill-formed input locale ID.
  91. * @param err error information if receiving the language
  92. * tag failed.
  93. * @return The length of the BCP47 language tag.
  94. *
  95. * @internal ICU 64
  96. */
  97. U_CAPI void U_EXPORT2
  98. ulocimp_toLanguageTag(const char* localeID,
  99. icu::ByteSink& sink,
  100. UBool strict,
  101. UErrorCode* err);
  102. /**
  103. * Returns a locale ID for the specified BCP47 language tag string.
  104. * If the specified language tag contains any ill-formed subtags,
  105. * the first such subtag and all following subtags are ignored.
  106. * <p>
  107. * This implements the 'Language-Tag' production of BCP 47, and so
  108. * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
  109. * (regular and irregular) as well as private use language tags.
  110. *
  111. * Private use tags are represented as 'x-whatever',
  112. * and legacy tags are converted to their canonical replacements where they exist.
  113. *
  114. * Note that a few legacy tags have no modern replacement;
  115. * these will be converted using the fallback described in
  116. * the first paragraph, so some information might be lost.
  117. *
  118. * @param langtag the input BCP47 language tag.
  119. * @param tagLen the length of langtag, or -1 to call uprv_strlen().
  120. * @param sink the output sink receiving a locale ID for the
  121. * specified BCP47 language tag.
  122. * @param parsedLength if not NULL, successfully parsed length
  123. * for the input language tag is set.
  124. * @param err error information if receiving the locald ID
  125. * failed.
  126. * @internal ICU 63
  127. */
  128. U_CAPI void U_EXPORT2
  129. ulocimp_forLanguageTag(const char* langtag,
  130. int32_t tagLen,
  131. icu::ByteSink& sink,
  132. int32_t* parsedLength,
  133. UErrorCode* err);
  134. /**
  135. * Get the region to use for supplemental data lookup. Uses
  136. * (1) any region specified by locale tag "rg"; if none then
  137. * (2) any unicode_region_tag in the locale ID; if none then
  138. * (3) if inferRegion is true, the region suggested by
  139. * getLikelySubtags on the localeID.
  140. * If no region is found, returns length 0.
  141. *
  142. * @param localeID
  143. * The complete locale ID (with keywords) from which
  144. * to get the region to use for supplemental data.
  145. * @param inferRegion
  146. * If true, will try to infer region from localeID if
  147. * no other region is found.
  148. * @param region
  149. * Buffer in which to put the region ID found; should
  150. * have a capacity at least ULOC_COUNTRY_CAPACITY.
  151. * @param regionCapacity
  152. * The actual capacity of the region buffer.
  153. * @param status
  154. * Pointer to in/out UErrorCode value for latest status.
  155. * @return
  156. * The length of any region code found, or 0 if none.
  157. * @internal ICU 57
  158. */
  159. U_CAPI int32_t U_EXPORT2
  160. ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
  161. char *region, int32_t regionCapacity, UErrorCode* status);
  162. /**
  163. * Add the likely subtags for a provided locale ID, per the algorithm described
  164. * in the following CLDR technical report:
  165. *
  166. * http://www.unicode.org/reports/tr35/#Likely_Subtags
  167. *
  168. * If localeID is already in the maximal form, or there is no data available
  169. * for maximization, it will be copied to the output buffer. For example,
  170. * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
  171. *
  172. * Examples:
  173. *
  174. * "en" maximizes to "en_Latn_US"
  175. *
  176. * "de" maximizes to "de_Latn_US"
  177. *
  178. * "sr" maximizes to "sr_Cyrl_RS"
  179. *
  180. * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
  181. *
  182. * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
  183. *
  184. * @param localeID The locale to maximize
  185. * @param sink The output sink receiving the maximized locale
  186. * @param err Error information if maximizing the locale failed. If the length
  187. * of the localeID and the null-terminator is greater than the maximum allowed size,
  188. * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  189. * @internal ICU 64
  190. */
  191. U_CAPI void U_EXPORT2
  192. ulocimp_addLikelySubtags(const char* localeID,
  193. icu::ByteSink& sink,
  194. UErrorCode* err);
  195. /**
  196. * Minimize the subtags for a provided locale ID, per the algorithm described
  197. * in the following CLDR technical report:
  198. *
  199. * http://www.unicode.org/reports/tr35/#Likely_Subtags
  200. *
  201. * If localeID is already in the minimal form, or there is no data available
  202. * for minimization, it will be copied to the output buffer. Since the
  203. * minimization algorithm relies on proper maximization, see the comments
  204. * for ulocimp_addLikelySubtags for reasons why there might not be any data.
  205. *
  206. * Examples:
  207. *
  208. * "en_Latn_US" minimizes to "en"
  209. *
  210. * "de_Latn_US" minimizes to "de"
  211. *
  212. * "sr_Cyrl_RS" minimizes to "sr"
  213. *
  214. * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
  215. * script, and minimizing to "zh" would imply "zh_Hans_CN".)
  216. *
  217. * @param localeID The locale to minimize
  218. * @param sink The output sink receiving the maximized locale
  219. * @param err Error information if minimizing the locale failed. If the length
  220. * of the localeID and the null-terminator is greater than the maximum allowed size,
  221. * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  222. * @internal ICU 64
  223. */
  224. U_CAPI void U_EXPORT2
  225. ulocimp_minimizeSubtags(const char* localeID,
  226. icu::ByteSink& sink,
  227. UErrorCode* err);
  228. U_CAPI const char * U_EXPORT2
  229. locale_getKeywordsStart(const char *localeID);
  230. U_CFUNC UBool
  231. ultag_isExtensionSubtags(const char* s, int32_t len);
  232. U_CFUNC UBool
  233. ultag_isLanguageSubtag(const char* s, int32_t len);
  234. U_CFUNC UBool
  235. ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
  236. U_CFUNC UBool
  237. ultag_isRegionSubtag(const char* s, int32_t len);
  238. U_CFUNC UBool
  239. ultag_isScriptSubtag(const char* s, int32_t len);
  240. U_CFUNC UBool
  241. ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
  242. U_CFUNC UBool
  243. ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
  244. U_CFUNC UBool
  245. ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
  246. U_CFUNC UBool
  247. ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
  248. U_CFUNC UBool
  249. ultag_isUnicodeLocaleKey(const char* s, int32_t len);
  250. U_CFUNC UBool
  251. ultag_isUnicodeLocaleType(const char* s, int32_t len);
  252. U_CFUNC UBool
  253. ultag_isVariantSubtags(const char* s, int32_t len);
  254. U_CAPI const char * U_EXPORT2
  255. ultag_getTKeyStart(const char *localeID);
  256. U_CFUNC const char*
  257. ulocimp_toBcpKey(const char* key);
  258. U_CFUNC const char*
  259. ulocimp_toLegacyKey(const char* key);
  260. U_CFUNC const char*
  261. ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
  262. U_CFUNC const char*
  263. ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
  264. /* Function for testing purpose */
  265. U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
  266. // Return true if the value is already canonicalized.
  267. U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
  268. /**
  269. * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
  270. * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
  271. * and then, if it's not big enough, reallocate it on the heap and try again.
  272. *
  273. * You use it like this:
  274. * UErrorCode err = U_ZERO_ERROR;
  275. *
  276. * PreflightingLocaleIDBuffer tempBuffer;
  277. * do {
  278. * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
  279. * } while (tempBuffer.needToTryAgain(&err));
  280. * if (U_SUCCESS(err)) {
  281. * uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
  282. * }
  283. */
  284. class PreflightingLocaleIDBuffer {
  285. private:
  286. char stackBuffer[ULOC_FULLNAME_CAPACITY];
  287. char* heapBuffer = nullptr;
  288. int32_t capacity = ULOC_FULLNAME_CAPACITY;
  289. public:
  290. int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
  291. // No heap allocation. Use only on the stack.
  292. static void* U_EXPORT2 operator new(size_t) noexcept = delete;
  293. static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
  294. #if U_HAVE_PLACEMENT_NEW
  295. static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
  296. #endif
  297. PreflightingLocaleIDBuffer() {}
  298. ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
  299. char* getBuffer() {
  300. if (heapBuffer == nullptr) {
  301. return stackBuffer;
  302. } else {
  303. return heapBuffer;
  304. }
  305. }
  306. int32_t getCapacity() {
  307. return capacity;
  308. }
  309. bool needToTryAgain(UErrorCode* err) {
  310. if (heapBuffer != nullptr) {
  311. return false;
  312. }
  313. if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
  314. int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia
  315. heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
  316. if (heapBuffer == nullptr) {
  317. *err = U_MEMORY_ALLOCATION_ERROR;
  318. } else {
  319. *err = U_ZERO_ERROR;
  320. capacity = newCapacity;
  321. }
  322. return U_SUCCESS(*err);
  323. }
  324. return false;
  325. }
  326. };
  327. #endif