locmap.cpp 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1996-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * Provides functionality for mapping between
  10. * LCID and Posix IDs or ICU locale to codepage
  11. *
  12. * Note: All classes and code in this file are
  13. * intended for internal use only.
  14. *
  15. * Methods of interest:
  16. * unsigned long convertToLCID(const char*);
  17. * const char* convertToPosix(unsigned long);
  18. *
  19. * Kathleen Wilson, 4/30/96
  20. *
  21. * Date Name Description
  22. * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
  23. * setId() method and safety check against
  24. * MAX_ID_LENGTH.
  25. * 04/23/99 stephen Added C wrapper for convertToPosix.
  26. * 09/18/00 george Removed the memory leaks.
  27. * 08/23/01 george Convert to C
  28. */
  29. #include "locmap.h"
  30. #include "bytesinkutil.h"
  31. #include "charstr.h"
  32. #include "cstring.h"
  33. #include "cmemory.h"
  34. #include "ulocimp.h"
  35. #include "unicode/uloc.h"
  36. #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
  37. #include <windows.h>
  38. #include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
  39. #endif
  40. /*
  41. * Note:
  42. * The mapping from Win32 locale ID numbers to POSIX locale strings should
  43. * be the faster one.
  44. *
  45. * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
  46. * [MS-LCID] Windows Language Code Identifier (LCID) Reference
  47. */
  48. /*
  49. ////////////////////////////////////////////////
  50. //
  51. // Internal Classes for LCID <--> POSIX Mapping
  52. //
  53. /////////////////////////////////////////////////
  54. */
  55. typedef struct ILcidPosixElement
  56. {
  57. const uint32_t hostID;
  58. const char * const posixID;
  59. } ILcidPosixElement;
  60. typedef struct ILcidPosixMap
  61. {
  62. const uint32_t numRegions;
  63. const struct ILcidPosixElement* const regionMaps;
  64. } ILcidPosixMap;
  65. /*
  66. /////////////////////////////////////////////////
  67. //
  68. // Easy macros to make the LCID <--> POSIX Mapping
  69. //
  70. /////////////////////////////////////////////////
  71. */
  72. /**
  73. * The standard one language/one country mapping for LCID.
  74. * The first element must be the language, and the following
  75. * elements are the language with the country.
  76. * @param hostID LCID in host format such as 0x044d
  77. * @param languageID posix ID of just the language such as 'de'
  78. * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
  79. */
  80. #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
  81. static const ILcidPosixElement locmap_ ## languageID [] = { \
  82. {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
  83. {hostID, #posixID}, \
  84. };
  85. /**
  86. * Define a subtable by ID
  87. * @param id the POSIX ID, either a language or language_TERRITORY
  88. */
  89. #define ILCID_POSIX_SUBTABLE(id) \
  90. static const ILcidPosixElement locmap_ ## id [] =
  91. /**
  92. * Create the map for the posixID. This macro supposes that the language string
  93. * name is the same as the global variable name, and that the first element
  94. * in the ILcidPosixElement is just the language.
  95. * @param _posixID the full POSIX ID for this entry.
  96. */
  97. #define ILCID_POSIX_MAP(_posixID) \
  98. {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
  99. /*
  100. ////////////////////////////////////////////
  101. //
  102. // Create the table of LCID to POSIX Mapping
  103. // None of it should be dynamically created.
  104. //
  105. // Keep static locale variables inside the function so that
  106. // it can be created properly during static init.
  107. //
  108. // Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
  109. // (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
  110. //
  111. // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
  112. // maintained for support of older Windows version.
  113. // Update: Windows 7 (091130)
  114. //
  115. // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
  116. // @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
  117. // called from uloc_getLCID(), keywords other than collation are already removed. If we really need
  118. // to support other keywords in this mapping data, we must update the implementation.
  119. ////////////////////////////////////////////
  120. */
  121. // TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
  122. // LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
  123. ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
  124. ILCID_POSIX_SUBTABLE(ar) {
  125. {0x01, "ar"},
  126. {0x3801, "ar_AE"},
  127. {0x3c01, "ar_BH"},
  128. {0x1401, "ar_DZ"},
  129. {0x0c01, "ar_EG"},
  130. {0x0801, "ar_IQ"},
  131. {0x2c01, "ar_JO"},
  132. {0x3401, "ar_KW"},
  133. {0x3001, "ar_LB"},
  134. {0x1001, "ar_LY"},
  135. {0x1801, "ar_MA"},
  136. {0x1801, "ar_MO"},
  137. {0x2001, "ar_OM"},
  138. {0x4001, "ar_QA"},
  139. {0x0401, "ar_SA"},
  140. {0x2801, "ar_SY"},
  141. {0x1c01, "ar_TN"},
  142. {0x2401, "ar_YE"}
  143. };
  144. ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
  145. ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
  146. ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
  147. ILCID_POSIX_SUBTABLE(az) {
  148. {0x2c, "az"},
  149. {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
  150. {0x742c, "az_Cyrl"}, /* Cyrillic based */
  151. {0x042c, "az_Latn_AZ"}, /* Latin based */
  152. {0x782c, "az_Latn"}, /* Latin based */
  153. {0x042c, "az_AZ"} /* Latin based */
  154. };
  155. ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
  156. ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
  157. /*ILCID_POSIX_SUBTABLE(ber) {
  158. {0x5f, "ber"},
  159. {0x045f, "ber_Arab_DZ"},
  160. {0x045f, "ber_Arab"},
  161. {0x085f, "ber_Latn_DZ"},
  162. {0x085f, "ber_Latn"}
  163. };*/
  164. ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
  165. ILCID_POSIX_SUBTABLE(bin) {
  166. {0x66, "bin"},
  167. {0x0466, "bin_NG"}
  168. };
  169. ILCID_POSIX_SUBTABLE(bn) {
  170. {0x45, "bn"},
  171. {0x0845, "bn_BD"},
  172. {0x0445, "bn_IN"}
  173. };
  174. ILCID_POSIX_SUBTABLE(bo) {
  175. {0x51, "bo"},
  176. {0x0851, "bo_BT"},
  177. {0x0451, "bo_CN"},
  178. {0x0c51, "dz_BT"}
  179. };
  180. ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
  181. ILCID_POSIX_SUBTABLE(ca) {
  182. {0x03, "ca"},
  183. {0x0403, "ca_ES"},
  184. {0x0803, "ca_ES_VALENCIA"}
  185. };
  186. ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
  187. ILCID_POSIX_SUBTABLE(chr) {
  188. {0x05c, "chr"},
  189. {0x7c5c, "chr_Cher"},
  190. {0x045c, "chr_Cher_US"},
  191. {0x045c, "chr_US"}
  192. };
  193. // ICU has chosen different names for these.
  194. ILCID_POSIX_SUBTABLE(ckb) {
  195. {0x92, "ckb"},
  196. {0x7c92, "ckb_Arab"},
  197. {0x0492, "ckb_Arab_IQ"}
  198. };
  199. /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
  200. ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
  201. ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
  202. ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
  203. // Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
  204. ILCID_POSIX_SUBTABLE(de) {
  205. {0x07, "de"},
  206. {0x0c07, "de_AT"},
  207. {0x0807, "de_CH"},
  208. {0x0407, "de_DE"},
  209. {0x1407, "de_LI"},
  210. {0x1007, "de_LU"},
  211. {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
  212. {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
  213. };
  214. ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
  215. ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
  216. // Windows uses an empty string for 'invariant'
  217. ILCID_POSIX_SUBTABLE(en) {
  218. {0x09, "en"},
  219. {0x0c09, "en_AU"},
  220. {0x2809, "en_BZ"},
  221. {0x1009, "en_CA"},
  222. {0x0809, "en_GB"},
  223. {0x3c09, "en_HK"},
  224. {0x3809, "en_ID"},
  225. {0x1809, "en_IE"},
  226. {0x4009, "en_IN"},
  227. {0x2009, "en_JM"},
  228. {0x4409, "en_MY"},
  229. {0x1409, "en_NZ"},
  230. {0x3409, "en_PH"},
  231. {0x4809, "en_SG"},
  232. {0x2C09, "en_TT"},
  233. {0x0409, "en_US"},
  234. {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
  235. {0x2409, "en_029"},
  236. {0x1c09, "en_ZA"},
  237. {0x3009, "en_ZW"},
  238. {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
  239. {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
  240. {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
  241. {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
  242. {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
  243. {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
  244. };
  245. ILCID_POSIX_SUBTABLE(en_US_POSIX) {
  246. {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
  247. };
  248. // Windows doesn't know POSIX or BCP47 Unicode traditional sort names
  249. ILCID_POSIX_SUBTABLE(es) {
  250. {0x0a, "es"},
  251. {0x2c0a, "es_AR"},
  252. {0x400a, "es_BO"},
  253. {0x340a, "es_CL"},
  254. {0x240a, "es_CO"},
  255. {0x140a, "es_CR"},
  256. {0x5c0a, "es_CU"},
  257. {0x1c0a, "es_DO"},
  258. {0x300a, "es_EC"},
  259. {0x0c0a, "es_ES"}, /*Modern sort.*/
  260. {0x100a, "es_GT"},
  261. {0x480a, "es_HN"},
  262. {0x080a, "es_MX"},
  263. {0x4c0a, "es_NI"},
  264. {0x180a, "es_PA"},
  265. {0x280a, "es_PE"},
  266. {0x500a, "es_PR"},
  267. {0x3c0a, "es_PY"},
  268. {0x440a, "es_SV"},
  269. {0x540a, "es_US"},
  270. {0x380a, "es_UY"},
  271. {0x200a, "es_VE"},
  272. {0x580a, "es_419"},
  273. {0x040a, "es_ES@collation=traditional"},
  274. {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
  275. };
  276. ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
  277. ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
  278. /* ISO-639 doesn't distinguish between Persian and Dari.*/
  279. ILCID_POSIX_SUBTABLE(fa) {
  280. {0x29, "fa"},
  281. {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
  282. {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
  283. };
  284. /* duplicate for roundtripping */
  285. ILCID_POSIX_SUBTABLE(fa_AF) {
  286. {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
  287. {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
  288. };
  289. ILCID_POSIX_SUBTABLE(ff) {
  290. {0x67, "ff"},
  291. {0x7c67, "ff_Latn"},
  292. {0x0867, "ff_Latn_SN"},
  293. {0x0467, "ff_NG"}
  294. };
  295. ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
  296. ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
  297. ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
  298. ILCID_POSIX_SUBTABLE(fr) {
  299. {0x0c, "fr"},
  300. {0x080c, "fr_BE"},
  301. {0x0c0c, "fr_CA"},
  302. {0x240c, "fr_CD"},
  303. {0x240c, "fr_CG"},
  304. {0x100c, "fr_CH"},
  305. {0x300c, "fr_CI"},
  306. {0x2c0c, "fr_CM"},
  307. {0x040c, "fr_FR"},
  308. {0x3c0c, "fr_HT"},
  309. {0x140c, "fr_LU"},
  310. {0x380c, "fr_MA"},
  311. {0x180c, "fr_MC"},
  312. {0x340c, "fr_ML"},
  313. {0x200c, "fr_RE"},
  314. {0x280c, "fr_SN"},
  315. {0xe40c, "fr_015"},
  316. {0x1c0c, "fr_029"}
  317. };
  318. ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
  319. ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
  320. ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
  321. {0x3c, "ga"},
  322. {0x083c, "ga_IE"},
  323. {0x043c, "gd_GB"}
  324. };
  325. ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
  326. {0x91, "gd"},
  327. {0x0491, "gd_GB"}
  328. };
  329. ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
  330. ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
  331. ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
  332. ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
  333. ILCID_POSIX_SUBTABLE(ha) {
  334. {0x68, "ha"},
  335. {0x7c68, "ha_Latn"},
  336. {0x0468, "ha_Latn_NG"},
  337. };
  338. ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
  339. ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
  340. ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
  341. /* This LCID is really four different locales.*/
  342. ILCID_POSIX_SUBTABLE(hr) {
  343. {0x1a, "hr"},
  344. {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
  345. {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
  346. {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
  347. {0x781a, "bs"}, /* Bosnian */
  348. {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
  349. {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
  350. {0x101a, "hr_BA"}, /* Croatian in Bosnia */
  351. {0x041a, "hr_HR"}, /* Croatian*/
  352. {0x2c1a, "sr_Latn_ME"},
  353. {0x241a, "sr_Latn_RS"},
  354. {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
  355. {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
  356. {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
  357. {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
  358. {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
  359. {0x301a, "sr_Cyrl_ME"},
  360. {0x281a, "sr_Cyrl_RS"},
  361. {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
  362. {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
  363. };
  364. ILCID_POSIX_SUBTABLE(hsb) {
  365. {0x2E, "hsb"},
  366. {0x042E, "hsb_DE"},
  367. {0x082E, "dsb_DE"},
  368. {0x7C2E, "dsb"},
  369. };
  370. ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
  371. ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
  372. ILCID_POSIX_SUBTABLE(ibb) {
  373. {0x69, "ibb"},
  374. {0x0469, "ibb_NG"}
  375. };
  376. ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
  377. ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
  378. ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
  379. ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
  380. ILCID_POSIX_SUBTABLE(it) {
  381. {0x10, "it"},
  382. {0x0810, "it_CH"},
  383. {0x0410, "it_IT"}
  384. };
  385. ILCID_POSIX_SUBTABLE(iu) {
  386. {0x5d, "iu"},
  387. {0x045d, "iu_Cans_CA"},
  388. {0x785d, "iu_Cans"},
  389. {0x085d, "iu_Latn_CA"},
  390. {0x7c5d, "iu_Latn"}
  391. };
  392. ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
  393. ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
  394. ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
  395. ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
  396. ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
  397. ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
  398. ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
  399. ILCID_POSIX_SUBTABLE(ko) {
  400. {0x12, "ko"},
  401. {0x0812, "ko_KP"},
  402. {0x0412, "ko_KR"}
  403. };
  404. ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
  405. ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
  406. ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
  407. {0x60, "ks"},
  408. {0x0460, "ks_Arab_IN"},
  409. {0x0860, "ks_Deva_IN"}
  410. };
  411. ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
  412. ILCID_POSIX_SUBTABLE(la) {
  413. {0x76, "la"},
  414. {0x0476, "la_001"},
  415. {0x0476, "la_IT"} /*Left in for compatibility*/
  416. };
  417. ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
  418. ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
  419. ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
  420. ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
  421. ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
  422. ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
  423. ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
  424. ILCID_POSIX_SUBTABLE(mn) {
  425. {0x50, "mn"},
  426. {0x0450, "mn_MN"},
  427. {0x7c50, "mn_Mong"},
  428. {0x0850, "mn_Mong_CN"},
  429. {0x0850, "mn_CN"},
  430. {0x7850, "mn_Cyrl"},
  431. {0x0c50, "mn_Mong_MN"}
  432. };
  433. ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
  434. ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
  435. ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
  436. ILCID_POSIX_SUBTABLE(ms) {
  437. {0x3e, "ms"},
  438. {0x083e, "ms_BN"}, /* Brunei Darussalam*/
  439. {0x043e, "ms_MY"} /* Malaysia*/
  440. };
  441. ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
  442. ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
  443. ILCID_POSIX_SUBTABLE(ne) {
  444. {0x61, "ne"},
  445. {0x0861, "ne_IN"}, /* India*/
  446. {0x0461, "ne_NP"} /* Nepal*/
  447. };
  448. ILCID_POSIX_SUBTABLE(nl) {
  449. {0x13, "nl"},
  450. {0x0813, "nl_BE"},
  451. {0x0413, "nl_NL"}
  452. };
  453. /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
  454. // TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
  455. ILCID_POSIX_SUBTABLE(no) {
  456. {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
  457. {0x7c14, "nb"}, /* really nb */
  458. {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
  459. {0x0414, "no_NO"}, /* really nb_NO */
  460. {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
  461. {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
  462. {0x0814, "no_NO_NY"}/* really nn_NO */
  463. };
  464. ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
  465. ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
  466. ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
  467. {0x72, "om"},
  468. {0x0472, "om_ET"},
  469. {0x0472, "gaz_ET"}
  470. };
  471. /* Declared as or_IN to get around compiler errors*/
  472. ILCID_POSIX_SUBTABLE(or_IN) {
  473. {0x48, "or"},
  474. {0x0448, "or_IN"},
  475. };
  476. ILCID_POSIX_SUBTABLE(pa) {
  477. {0x46, "pa"},
  478. {0x0446, "pa_IN"},
  479. {0x0846, "pa_Arab_PK"},
  480. {0x0846, "pa_PK"}
  481. };
  482. ILCID_POSIX_SUBTABLE(pap) {
  483. {0x79, "pap"},
  484. {0x0479, "pap_029"},
  485. {0x0479, "pap_AN"} /*Left in for compatibility*/
  486. };
  487. ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
  488. ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
  489. ILCID_POSIX_SUBTABLE(pt) {
  490. {0x16, "pt"},
  491. {0x0416, "pt_BR"},
  492. {0x0816, "pt_PT"}
  493. };
  494. ILCID_POSIX_SUBTABLE(qu) {
  495. {0x6b, "qu"},
  496. {0x046b, "qu_BO"},
  497. {0x086b, "qu_EC"},
  498. {0x0C6b, "qu_PE"},
  499. {0x046b, "quz_BO"},
  500. {0x086b, "quz_EC"},
  501. {0x0C6b, "quz_PE"}
  502. };
  503. ILCID_POSIX_SUBTABLE(quc) {
  504. {0x93, "quc"},
  505. {0x0493, "quc_CO"},
  506. /*
  507. "quc_Latn_GT" is an exceptional case. Language ID of "quc"
  508. is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
  509. under the group of "qut". "qut" is a retired ISO 639-3 language
  510. code for West Central Quiche, and merged to "quc".
  511. It looks Windows previously reserved "qut" for K'iche', but,
  512. decided to use "quc" when adding a locale for K'iche' (Guatemala).
  513. This data structure used here assumes language ID bits in
  514. LCID is unique for alphabetic language code. But this is not true
  515. for "quc_Latn_GT". If we don't have the data below, LCID look up
  516. by alphabetic locale ID (POSIX) will fail. The same entry is found
  517. under "qut" below, which is required for reverse look up.
  518. */
  519. {0x0486, "quc_Latn_GT"}
  520. };
  521. ILCID_POSIX_SUBTABLE(qut) {
  522. {0x86, "qut"},
  523. {0x0486, "qut_GT"},
  524. /*
  525. See the note in "quc" above.
  526. */
  527. {0x0486, "quc_Latn_GT"}
  528. };
  529. ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
  530. ILCID_POSIX_SUBTABLE(ro) {
  531. {0x18, "ro"},
  532. {0x0418, "ro_RO"},
  533. {0x0818, "ro_MD"}
  534. };
  535. // TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
  536. // More likely this is a similar concept to the Windows 0x7f Invariant locale ""
  537. // (Except that it's not invariant in ICU)
  538. ILCID_POSIX_SUBTABLE(root) {
  539. {0x00, "root"}
  540. };
  541. ILCID_POSIX_SUBTABLE(ru) {
  542. {0x19, "ru"},
  543. {0x0419, "ru_RU"},
  544. {0x0819, "ru_MD"}
  545. };
  546. ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
  547. ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
  548. ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
  549. ILCID_POSIX_SUBTABLE(sd) {
  550. {0x59, "sd"},
  551. {0x0459, "sd_Deva_IN"},
  552. {0x0459, "sd_IN"},
  553. {0x0859, "sd_Arab_PK"},
  554. {0x0859, "sd_PK"},
  555. {0x7c59, "sd_Arab"}
  556. };
  557. ILCID_POSIX_SUBTABLE(se) {
  558. {0x3b, "se"},
  559. {0x0c3b, "se_FI"},
  560. {0x043b, "se_NO"},
  561. {0x083b, "se_SE"},
  562. {0x783b, "sma"},
  563. {0x183b, "sma_NO"},
  564. {0x1c3b, "sma_SE"},
  565. {0x7c3b, "smj"},
  566. {0x703b, "smn"},
  567. {0x743b, "sms"},
  568. {0x103b, "smj_NO"},
  569. {0x143b, "smj_SE"},
  570. {0x243b, "smn_FI"},
  571. {0x203b, "sms_FI"},
  572. };
  573. ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
  574. ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
  575. ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
  576. ILCID_POSIX_SUBTABLE(so) {
  577. {0x77, "so"},
  578. {0x0477, "so_SO"}
  579. };
  580. ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
  581. ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
  582. ILCID_POSIX_SUBTABLE(sv) {
  583. {0x1d, "sv"},
  584. {0x081d, "sv_FI"},
  585. {0x041d, "sv_SE"}
  586. };
  587. ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
  588. ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
  589. ILCID_POSIX_SUBTABLE(ta) {
  590. {0x49, "ta"},
  591. {0x0449, "ta_IN"},
  592. {0x0849, "ta_LK"}
  593. };
  594. ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
  595. /* Cyrillic based by default */
  596. ILCID_POSIX_SUBTABLE(tg) {
  597. {0x28, "tg"},
  598. {0x7c28, "tg_Cyrl"},
  599. {0x0428, "tg_Cyrl_TJ"}
  600. };
  601. ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
  602. ILCID_POSIX_SUBTABLE(ti) {
  603. {0x73, "ti"},
  604. {0x0873, "ti_ER"},
  605. {0x0473, "ti_ET"}
  606. };
  607. ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
  608. ILCID_POSIX_SUBTABLE(tn) {
  609. {0x32, "tn"},
  610. {0x0832, "tn_BW"},
  611. {0x0432, "tn_ZA"}
  612. };
  613. ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
  614. ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
  615. ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
  616. ILCID_POSIX_SUBTABLE(tzm) {
  617. {0x5f, "tzm"},
  618. {0x7c5f, "tzm_Latn"},
  619. {0x085f, "tzm_Latn_DZ"},
  620. {0x105f, "tzm_Tfng_MA"},
  621. {0x045f, "tzm_Arab_MA"},
  622. {0x045f, "tmz"}
  623. };
  624. ILCID_POSIX_SUBTABLE(ug) {
  625. {0x80, "ug"},
  626. {0x0480, "ug_CN"},
  627. {0x0480, "ug_Arab_CN"}
  628. };
  629. ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
  630. ILCID_POSIX_SUBTABLE(ur) {
  631. {0x20, "ur"},
  632. {0x0820, "ur_IN"},
  633. {0x0420, "ur_PK"}
  634. };
  635. ILCID_POSIX_SUBTABLE(uz) {
  636. {0x43, "uz"},
  637. {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
  638. {0x7843, "uz_Cyrl"}, /* Cyrillic based */
  639. {0x0843, "uz_UZ"}, /* Cyrillic based */
  640. {0x0443, "uz_Latn_UZ"}, /* Latin based */
  641. {0x7c43, "uz_Latn"} /* Latin based */
  642. };
  643. ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
  644. {0x33, "ve"},
  645. {0x0433, "ve_ZA"},
  646. {0x0433, "ven_ZA"}
  647. };
  648. ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
  649. ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
  650. ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
  651. ILCID_POSIX_SUBTABLE(yi) {
  652. {0x003d, "yi"},
  653. {0x043d, "yi_001"}
  654. };
  655. ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
  656. // Windows & ICU tend to different names for some of these
  657. // TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
  658. ILCID_POSIX_SUBTABLE(zh) {
  659. {0x0004, "zh_Hans"},
  660. {0x7804, "zh"},
  661. {0x0804, "zh_CN"},
  662. {0x0804, "zh_Hans_CN"},
  663. {0x0c04, "zh_Hant_HK"},
  664. {0x0c04, "zh_HK"},
  665. {0x1404, "zh_Hant_MO"},
  666. {0x1404, "zh_MO"},
  667. {0x1004, "zh_Hans_SG"},
  668. {0x1004, "zh_SG"},
  669. {0x0404, "zh_Hant_TW"},
  670. {0x7c04, "zh_Hant"},
  671. {0x0404, "zh_TW"},
  672. {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
  673. {0x30404,"zh_TW"}, /* Bopomofo order */
  674. {0x20004,"zh@collation=stroke"},
  675. {0x20404,"zh_Hant@collation=stroke"},
  676. {0x20404,"zh_Hant_TW@collation=stroke"},
  677. {0x20404,"zh_TW@collation=stroke"},
  678. {0x20804,"zh_Hans@collation=stroke"},
  679. {0x20804,"zh_Hans_CN@collation=stroke"},
  680. {0x20804,"zh_CN@collation=stroke"}
  681. // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
  682. };
  683. ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
  684. /* This must be static and grouped by LCID. */
  685. static const ILcidPosixMap gPosixIDmap[] = {
  686. ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
  687. ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
  688. ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
  689. ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
  690. ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
  691. ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
  692. ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
  693. ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
  694. /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
  695. ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
  696. ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
  697. ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
  698. ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
  699. ILCID_POSIX_MAP(br), /* br Breton 0x7e */
  700. ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
  701. ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
  702. ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
  703. ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
  704. ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
  705. ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
  706. ILCID_POSIX_MAP(da), /* da Danish 0x06 */
  707. ILCID_POSIX_MAP(de), /* de German 0x07 */
  708. ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
  709. ILCID_POSIX_MAP(el), /* el Greek 0x08 */
  710. ILCID_POSIX_MAP(en), /* en English 0x09 */
  711. ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
  712. ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
  713. ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
  714. ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
  715. ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
  716. ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
  717. ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
  718. ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
  719. ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
  720. ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
  721. ILCID_POSIX_MAP(fr), /* fr French 0x0c */
  722. ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
  723. ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
  724. ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
  725. ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
  726. ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
  727. ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
  728. ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
  729. ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
  730. ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
  731. ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
  732. ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
  733. ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
  734. ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
  735. ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
  736. ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
  737. ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
  738. ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
  739. ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
  740. ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
  741. ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
  742. ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
  743. ILCID_POSIX_MAP(it), /* it Italian 0x10 */
  744. ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
  745. ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
  746. ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
  747. ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
  748. ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
  749. ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
  750. ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
  751. ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
  752. ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
  753. ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
  754. ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
  755. ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
  756. ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
  757. ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
  758. ILCID_POSIX_MAP(la), /* la Latin 0x76 */
  759. ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
  760. ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
  761. ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
  762. ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
  763. ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
  764. ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
  765. ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
  766. ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
  767. ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
  768. ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
  769. ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
  770. ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
  771. ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
  772. /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
  773. ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
  774. ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
  775. /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
  776. ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
  777. ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
  778. ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
  779. ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
  780. ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
  781. ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
  782. ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
  783. ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
  784. ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
  785. ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
  786. ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
  787. ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
  788. ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
  789. ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
  790. ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
  791. ILCID_POSIX_MAP(root), /* root 0x00 */
  792. ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
  793. ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
  794. ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
  795. ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
  796. ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
  797. ILCID_POSIX_MAP(se), /* se Sami 0x3b */
  798. /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
  799. ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
  800. ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
  801. ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
  802. ILCID_POSIX_MAP(so), /* so Somali 0x77 */
  803. ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
  804. /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
  805. ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
  806. ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
  807. ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
  808. ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
  809. ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
  810. ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
  811. ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
  812. ILCID_POSIX_MAP(th), /* th Thai 0x1e */
  813. ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
  814. ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
  815. ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
  816. ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
  817. ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
  818. ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
  819. ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
  820. ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
  821. ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
  822. ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
  823. ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
  824. ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
  825. ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
  826. ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
  827. ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
  828. ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
  829. ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
  830. ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
  831. ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
  832. };
  833. static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
  834. /**
  835. * Do not call this function. It is called by hostID.
  836. * The function is not private because this struct must stay as a C struct,
  837. * and this is an internal class.
  838. */
  839. static int32_t
  840. idCmp(const char* id1, const char* id2)
  841. {
  842. int32_t diffIdx = 0;
  843. while (*id1 == *id2 && *id1 != 0) {
  844. diffIdx++;
  845. id1++;
  846. id2++;
  847. }
  848. return diffIdx;
  849. }
  850. /**
  851. * Searches for a Windows LCID
  852. *
  853. * @param posixID the Posix style locale id.
  854. * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
  855. * no equivalent Windows LCID.
  856. * @return the LCID
  857. */
  858. static uint32_t
  859. getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
  860. {
  861. int32_t bestIdx = 0;
  862. int32_t bestIdxDiff = 0;
  863. int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
  864. uint32_t idx;
  865. for (idx = 0; idx < this_0->numRegions; idx++ ) {
  866. int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
  867. if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
  868. if (posixIDlen == sameChars) {
  869. /* Exact match */
  870. return this_0->regionMaps[idx].hostID;
  871. }
  872. bestIdxDiff = sameChars;
  873. bestIdx = idx;
  874. }
  875. }
  876. /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
  877. /* We also have to make sure that sid and si and similar string subsets don't match. */
  878. if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
  879. && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
  880. {
  881. *status = U_USING_FALLBACK_WARNING;
  882. return this_0->regionMaps[bestIdx].hostID;
  883. }
  884. /*no match found */
  885. *status = U_ILLEGAL_ARGUMENT_ERROR;
  886. return this_0->regionMaps->hostID;
  887. }
  888. static const char*
  889. getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
  890. {
  891. uint32_t i;
  892. for (i = 0; i < this_0->numRegions; i++)
  893. {
  894. if (this_0->regionMaps[i].hostID == hostID)
  895. {
  896. return this_0->regionMaps[i].posixID;
  897. }
  898. }
  899. /* If you get here, then no matching region was found,
  900. so return the language id with the wild card region. */
  901. return this_0->regionMaps[0].posixID;
  902. }
  903. /*
  904. //////////////////////////////////////
  905. //
  906. // LCID --> POSIX
  907. //
  908. /////////////////////////////////////
  909. */
  910. #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
  911. /*
  912. * Various language tags needs to be changed:
  913. * quz -> qu
  914. * prs -> fa
  915. */
  916. #define FIX_LANGUAGE_ID_TAG(buffer, len) \
  917. if (len >= 3) { \
  918. if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
  919. buffer[2] = 0; \
  920. uprv_strcat(buffer, buffer+3); \
  921. } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
  922. buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
  923. uprv_strcat(buffer, buffer+3); \
  924. } \
  925. }
  926. #endif
  927. U_CAPI int32_t
  928. uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
  929. {
  930. uint16_t langID;
  931. uint32_t localeIndex;
  932. UBool bLookup = true;
  933. const char *pPosixID = nullptr;
  934. #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
  935. static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
  936. char locName[LOCALE_NAME_MAX_LENGTH] = {};
  937. // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
  938. // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
  939. // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
  940. // use the Windows API to resolve locale ID for this specific case.
  941. if ((hostid & 0x3FF) != 0x92) {
  942. int32_t tmpLen = 0;
  943. char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
  944. // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
  945. tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
  946. if (tmpLen > 1) {
  947. int32_t i = 0;
  948. // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
  949. bLookup = false;
  950. for (i = 0; i < UPRV_LENGTHOF(locName); i++)
  951. {
  952. locName[i] = (char)(windowsLocaleName[i]);
  953. // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
  954. // In such cases, we need special mapping data found in the hardcoded table
  955. // in this source file.
  956. if (windowsLocaleName[i] == L'_')
  957. {
  958. // Keep the base locale, without variant
  959. // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
  960. locName[i] = '\0';
  961. tmpLen = i;
  962. bLookup = true;
  963. break;
  964. }
  965. else if (windowsLocaleName[i] == L'-')
  966. {
  967. // Windows names use -, ICU uses _
  968. locName[i] = '_';
  969. }
  970. else if (windowsLocaleName[i] == L'\0')
  971. {
  972. // No point in doing more work than necessary
  973. break;
  974. }
  975. }
  976. // TODO: Need to understand this better, why isn't it an alias?
  977. FIX_LANGUAGE_ID_TAG(locName, tmpLen);
  978. pPosixID = locName;
  979. }
  980. }
  981. #endif
  982. if (bLookup) {
  983. const char *pCandidate = nullptr;
  984. langID = LANGUAGE_LCID(hostid);
  985. for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
  986. if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
  987. pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
  988. break;
  989. }
  990. }
  991. /* On Windows, when locale name has a variant, we still look up the hardcoded table.
  992. If a match in the hardcoded table is longer than the Windows locale name without
  993. variant, we use the one as the result */
  994. if (pCandidate && (pPosixID == nullptr || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
  995. pPosixID = pCandidate;
  996. }
  997. }
  998. if (pPosixID) {
  999. int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
  1000. int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
  1001. uprv_memcpy(posixID, pPosixID, copyLen);
  1002. if (resLen < posixIDCapacity) {
  1003. posixID[resLen] = 0;
  1004. if (*status == U_STRING_NOT_TERMINATED_WARNING) {
  1005. *status = U_ZERO_ERROR;
  1006. }
  1007. } else if (resLen == posixIDCapacity) {
  1008. *status = U_STRING_NOT_TERMINATED_WARNING;
  1009. } else {
  1010. *status = U_BUFFER_OVERFLOW_ERROR;
  1011. }
  1012. return resLen;
  1013. }
  1014. /* no match found */
  1015. *status = U_ILLEGAL_ARGUMENT_ERROR;
  1016. return -1;
  1017. }
  1018. /*
  1019. //////////////////////////////////////
  1020. //
  1021. // POSIX --> LCID
  1022. // This should only be called from uloc_getLCID.
  1023. // The locale ID must be in canonical form.
  1024. //
  1025. /////////////////////////////////////
  1026. */
  1027. U_CAPI uint32_t
  1028. uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
  1029. {
  1030. if (U_FAILURE(*status)) {
  1031. return 0;
  1032. }
  1033. // The purpose of this function is to leverage the Windows platform name->lcid
  1034. // conversion functionality when available.
  1035. #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
  1036. int32_t len;
  1037. char baseName[ULOC_FULLNAME_CAPACITY] = {};
  1038. const char * mylocaleID = localeID;
  1039. // Check any for keywords.
  1040. if (uprv_strchr(localeID, '@'))
  1041. {
  1042. icu::CharString collVal;
  1043. {
  1044. icu::CharStringByteSink sink(&collVal);
  1045. ulocimp_getKeywordValue(localeID, "collation", sink, status);
  1046. }
  1047. if (U_SUCCESS(*status) && !collVal.isEmpty())
  1048. {
  1049. // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
  1050. return 0;
  1051. }
  1052. else
  1053. {
  1054. // If the locale ID contains keywords other than collation, just use the base name.
  1055. len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
  1056. if (U_SUCCESS(*status) && len > 0)
  1057. {
  1058. baseName[len] = 0;
  1059. mylocaleID = baseName;
  1060. }
  1061. }
  1062. }
  1063. char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
  1064. // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
  1065. (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), false, status);
  1066. if (U_SUCCESS(*status))
  1067. {
  1068. // Need it to be UTF-16, not 8-bit
  1069. wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
  1070. int32_t i;
  1071. for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
  1072. {
  1073. if (asciiBCP47Tag[i] == '\0')
  1074. {
  1075. break;
  1076. }
  1077. else
  1078. {
  1079. // Copy the character
  1080. bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
  1081. }
  1082. }
  1083. if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
  1084. {
  1085. // Ensure it's null terminated
  1086. bcp47Tag[i] = L'\0';
  1087. LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
  1088. if (lcid > 0)
  1089. {
  1090. // Found LCID from windows, return that one, unless its completely ambiguous
  1091. // LOCALE_USER_DEFAULT and transients are OK because they will round trip
  1092. // for this process.
  1093. if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
  1094. {
  1095. return lcid;
  1096. }
  1097. }
  1098. }
  1099. }
  1100. #else
  1101. (void) localeID; // Suppress unused variable warning.
  1102. #endif
  1103. // Nothing found, or not implemented.
  1104. return 0;
  1105. }
  1106. U_CAPI uint32_t
  1107. uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
  1108. {
  1109. // This function does the table lookup when native platform name->lcid conversion isn't available,
  1110. // or for locales that don't follow patterns the platform expects.
  1111. uint32_t low = 0;
  1112. uint32_t high = gLocaleCount;
  1113. uint32_t mid;
  1114. uint32_t oldmid = 0;
  1115. int32_t compVal;
  1116. uint32_t value = 0;
  1117. uint32_t fallbackValue = (uint32_t)-1;
  1118. UErrorCode myStatus;
  1119. uint32_t idx;
  1120. /* Check for incomplete id. */
  1121. if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
  1122. return 0;
  1123. }
  1124. /*Binary search for the map entry for normal cases */
  1125. while (high > low) /*binary search*/{
  1126. mid = (high+low) >> 1; /*Finds median*/
  1127. if (mid == oldmid)
  1128. break;
  1129. compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
  1130. if (compVal < 0){
  1131. high = mid;
  1132. }
  1133. else if (compVal > 0){
  1134. low = mid;
  1135. }
  1136. else /*we found it*/{
  1137. return getHostID(&gPosixIDmap[mid], posixID, status);
  1138. }
  1139. oldmid = mid;
  1140. }
  1141. /*
  1142. * Sometimes we can't do a binary search on posixID because some LCIDs
  1143. * go to different locales. We hit one of those special cases.
  1144. */
  1145. for (idx = 0; idx < gLocaleCount; idx++ ) {
  1146. myStatus = U_ZERO_ERROR;
  1147. value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
  1148. if (myStatus == U_ZERO_ERROR) {
  1149. return value;
  1150. }
  1151. else if (myStatus == U_USING_FALLBACK_WARNING) {
  1152. fallbackValue = value;
  1153. }
  1154. }
  1155. if (fallbackValue != (uint32_t)-1) {
  1156. *status = U_USING_FALLBACK_WARNING;
  1157. return fallbackValue;
  1158. }
  1159. /* no match found */
  1160. *status = U_ILLEGAL_ARGUMENT_ERROR;
  1161. return 0; /* return international (root) */
  1162. }