utils.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. /*
  2. * Utility routines
  3. *
  4. * Copyright 1998 Bertho A. Stultiens
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19. */
  20. #include "config.h"
  21. #include "wine/port.h"
  22. #include <assert.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <stdarg.h>
  26. #include <string.h>
  27. #include <ctype.h>
  28. #include "wrc.h"
  29. #include "utils.h"
  30. #include "parser.h"
  31. /* #define WANT_NEAR_INDICATION */
  32. #ifdef WANT_NEAR_INDICATION
  33. void make_print(char *str)
  34. {
  35. while(*str)
  36. {
  37. if(!isprint(*str))
  38. *str = ' ';
  39. str++;
  40. }
  41. }
  42. #endif
  43. static void generic_msg(const char *s, const char *t, const char *n, va_list ap)
  44. {
  45. fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
  46. vfprintf(stderr, s, ap);
  47. #ifdef WANT_NEAR_INDICATION
  48. {
  49. char *cpy;
  50. if(n)
  51. {
  52. cpy = xstrdup(n);
  53. make_print(cpy);
  54. fprintf(stderr, " near '%s'", cpy);
  55. free(cpy);
  56. }
  57. }
  58. #endif
  59. }
  60. int parser_error(const char *s, ...)
  61. {
  62. va_list ap;
  63. va_start(ap, s);
  64. generic_msg(s, "Error", parser_text, ap);
  65. fputc( '\n', stderr );
  66. va_end(ap);
  67. exit(1);
  68. return 1;
  69. }
  70. int parser_warning(const char *s, ...)
  71. {
  72. va_list ap;
  73. va_start(ap, s);
  74. generic_msg(s, "Warning", parser_text, ap);
  75. va_end(ap);
  76. return 0;
  77. }
  78. void internal_error(const char *file, int line, const char *s, ...)
  79. {
  80. va_list ap;
  81. va_start(ap, s);
  82. fprintf(stderr, "Internal error (please report) %s %d: ", file, line);
  83. vfprintf(stderr, s, ap);
  84. va_end(ap);
  85. exit(3);
  86. }
  87. void fatal_perror( const char *msg, ... )
  88. {
  89. va_list valist;
  90. va_start( valist, msg );
  91. fprintf(stderr, "Error: ");
  92. vfprintf( stderr, msg, valist );
  93. perror( " " );
  94. va_end( valist );
  95. exit(2);
  96. }
  97. void error(const char *s, ...)
  98. {
  99. va_list ap;
  100. va_start(ap, s);
  101. fprintf(stderr, "Error: ");
  102. vfprintf(stderr, s, ap);
  103. va_end(ap);
  104. exit(2);
  105. }
  106. void warning(const char *s, ...)
  107. {
  108. va_list ap;
  109. va_start(ap, s);
  110. fprintf(stderr, "Warning: ");
  111. vfprintf(stderr, s, ap);
  112. va_end(ap);
  113. }
  114. void chat(const char *s, ...)
  115. {
  116. if(debuglevel & DEBUGLEVEL_CHAT)
  117. {
  118. va_list ap;
  119. va_start(ap, s);
  120. fprintf(stderr, "FYI: ");
  121. vfprintf(stderr, s, ap);
  122. va_end(ap);
  123. }
  124. }
  125. char *dup_basename(const char *name, const char *ext)
  126. {
  127. int namelen;
  128. int extlen = strlen(ext);
  129. char *base;
  130. char *slash;
  131. if(!name)
  132. name = "wrc.tab";
  133. slash = strrchr(name, '/');
  134. if (slash)
  135. name = slash + 1;
  136. namelen = strlen(name);
  137. /* +4 for later extension and +1 for '\0' */
  138. base = xmalloc(namelen +4 +1);
  139. strcpy(base, name);
  140. if(!strcasecmp(name + namelen-extlen, ext))
  141. {
  142. base[namelen - extlen] = '\0';
  143. }
  144. return base;
  145. }
  146. void *xmalloc(size_t size)
  147. {
  148. void *res;
  149. assert(size > 0);
  150. res = malloc(size);
  151. if(res == NULL)
  152. {
  153. error("Virtual memory exhausted.\n");
  154. }
  155. memset(res, 0x55, size);
  156. return res;
  157. }
  158. void *xrealloc(void *p, size_t size)
  159. {
  160. void *res;
  161. assert(size > 0);
  162. res = realloc(p, size);
  163. if(res == NULL)
  164. {
  165. error("Virtual memory exhausted.\n");
  166. }
  167. return res;
  168. }
  169. char *strmake( const char* fmt, ... )
  170. {
  171. int n;
  172. size_t size = 100;
  173. va_list ap;
  174. for (;;)
  175. {
  176. char *p = xmalloc( size );
  177. va_start( ap, fmt );
  178. n = vsnprintf( p, size, fmt, ap );
  179. va_end( ap );
  180. if (n == -1) size *= 2;
  181. else if ((size_t)n >= size) size = n + 1;
  182. else return p;
  183. free( p );
  184. }
  185. }
  186. char *xstrdup(const char *str)
  187. {
  188. char *s;
  189. assert(str != NULL);
  190. s = xmalloc(strlen(str)+1);
  191. return strcpy(s, str);
  192. }
  193. int strendswith( const char *str, const char *end )
  194. {
  195. int l = strlen(str);
  196. int m = strlen(end);
  197. return l >= m && !strcmp( str + l - m, end );
  198. }
  199. int compare_striA( const char *str1, const char *str2 )
  200. {
  201. for (;;)
  202. {
  203. /* only the A-Z range is case-insensitive */
  204. char ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
  205. char ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
  206. if (!ch1 || ch1 != ch2) return ch1 - ch2;
  207. str1++;
  208. str2++;
  209. }
  210. }
  211. int compare_striW( const WCHAR *str1, const WCHAR *str2 )
  212. {
  213. for (;;)
  214. {
  215. /* only the A-Z range is case-insensitive */
  216. WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
  217. WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
  218. if (!ch1 || ch1 != ch2) return ch1 - ch2;
  219. str1++;
  220. str2++;
  221. }
  222. }
  223. /*
  224. *****************************************************************************
  225. * Function : compare_name_id
  226. * Syntax : int compare_name_id(const name_id_t *n1, const name_id_t *n2)
  227. * Input :
  228. * Output :
  229. * Description :
  230. * Remarks :
  231. *****************************************************************************
  232. */
  233. int compare_name_id(const name_id_t *n1, const name_id_t *n2)
  234. {
  235. if(n1->type == name_ord && n2->type == name_ord)
  236. {
  237. return n1->name.i_name - n2->name.i_name;
  238. }
  239. else if(n1->type == name_str && n2->type == name_str)
  240. {
  241. if(n1->name.s_name->type == str_char
  242. && n2->name.s_name->type == str_char)
  243. {
  244. return compare_striA(n1->name.s_name->str.cstr, n2->name.s_name->str.cstr);
  245. }
  246. else if(n1->name.s_name->type == str_unicode
  247. && n2->name.s_name->type == str_unicode)
  248. {
  249. return compare_striW(n1->name.s_name->str.wstr, n2->name.s_name->str.wstr);
  250. }
  251. else
  252. {
  253. internal_error(__FILE__, __LINE__, "Can't yet compare strings of mixed type\n");
  254. }
  255. }
  256. else if(n1->type == name_ord && n2->type == name_str)
  257. return 1;
  258. else if(n1->type == name_str && n2->type == name_ord)
  259. return -1;
  260. else
  261. internal_error(__FILE__, __LINE__, "Comparing name-ids with unknown types (%d, %d)\n",
  262. n1->type, n2->type);
  263. return 0; /* Keep the compiler happy */
  264. }
  265. #ifdef _WIN32
  266. int is_valid_codepage(int id)
  267. {
  268. return IsValidCodePage( id );
  269. }
  270. static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
  271. {
  272. WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
  273. DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen );
  274. if (!ret) return NULL;
  275. dst[ret] = 0;
  276. *dstlen = ret;
  277. return dst;
  278. }
  279. #else /* _WIN32 */
  280. struct nls_info
  281. {
  282. unsigned short codepage;
  283. unsigned short unidef;
  284. unsigned short trans_unidef;
  285. unsigned short *cp2uni;
  286. unsigned short *dbcs_offsets;
  287. };
  288. static struct nls_info nlsinfo[128];
  289. static void init_nls_info( struct nls_info *info, unsigned short *ptr )
  290. {
  291. unsigned short hdr_size = ptr[0];
  292. info->codepage = ptr[1];
  293. info->unidef = ptr[4];
  294. info->trans_unidef = ptr[6];
  295. ptr += hdr_size;
  296. info->cp2uni = ++ptr;
  297. ptr += 256;
  298. if (*ptr++) ptr += 256; /* glyph table */
  299. info->dbcs_offsets = *ptr ? ptr + 1 : NULL;
  300. }
  301. static const struct nls_info *get_nls_info( unsigned int codepage )
  302. {
  303. struct stat st;
  304. unsigned short *data;
  305. char *path;
  306. unsigned int i;
  307. int fd;
  308. for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++)
  309. if (nlsinfo[i].codepage == codepage) return &nlsinfo[i];
  310. assert( i < ARRAY_SIZE(nlsinfo) );
  311. for (i = 0; nlsdirs[i]; i++)
  312. {
  313. path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage );
  314. if ((fd = open( path, O_RDONLY )) != -1) break;
  315. free( path );
  316. }
  317. if (!nlsdirs[i]) return NULL;
  318. fstat( fd, &st );
  319. data = xmalloc( st.st_size );
  320. if (read( fd, data, st.st_size ) != st.st_size) error( "failed to load %s\n", path );
  321. close( fd );
  322. free( path );
  323. init_nls_info( &nlsinfo[i], data );
  324. return &nlsinfo[i];
  325. }
  326. int is_valid_codepage(int cp)
  327. {
  328. return cp == CP_UTF8 || get_nls_info( cp );
  329. }
  330. static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
  331. {
  332. const struct nls_info *info = get_nls_info( codepage );
  333. unsigned int i;
  334. WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
  335. if (!info) error( "codepage %u not supported\n", codepage );
  336. if (info->dbcs_offsets)
  337. {
  338. for (i = 0; srclen; i++, srclen--, src++)
  339. {
  340. unsigned short off = info->dbcs_offsets[(unsigned char)*src];
  341. if (off)
  342. {
  343. if (srclen == 1) return NULL;
  344. dbch = (src[0] << 8) | (unsigned char)src[1];
  345. src++;
  346. srclen--;
  347. dst[i] = info->dbcs_offsets[off + (unsigned char)*src];
  348. if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL;
  349. }
  350. else
  351. {
  352. dst[i] = info->cp2uni[(unsigned char)*src];
  353. if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL;
  354. }
  355. }
  356. }
  357. else
  358. {
  359. for (i = 0; i < srclen; i++)
  360. {
  361. dst[i] = info->cp2uni[(unsigned char)src[i]];
  362. if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL;
  363. }
  364. }
  365. dst[i] = 0;
  366. *dstlen = i;
  367. return dst;
  368. }
  369. #endif /* _WIN32 */
  370. static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
  371. {
  372. static const char utf8_length[128] =
  373. {
  374. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
  375. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
  376. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
  377. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
  378. 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
  379. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
  380. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
  381. 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
  382. };
  383. static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
  384. const char *srcend = src + srclen;
  385. int len, res;
  386. WCHAR *ret, *dst;
  387. dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) );
  388. while (src < srcend)
  389. {
  390. unsigned char ch = *src++;
  391. if (ch < 0x80) /* special fast case for 7-bit ASCII */
  392. {
  393. *dst++ = ch;
  394. continue;
  395. }
  396. len = utf8_length[ch - 0x80];
  397. if (len && src + len <= srcend)
  398. {
  399. res = ch & utf8_mask[len];
  400. switch (len)
  401. {
  402. case 3:
  403. if ((ch = *src ^ 0x80) >= 0x40) break;
  404. res = (res << 6) | ch;
  405. src++;
  406. if (res < 0x10) break;
  407. case 2:
  408. if ((ch = *src ^ 0x80) >= 0x40) break;
  409. res = (res << 6) | ch;
  410. if (res >= 0x110000 >> 6) break;
  411. src++;
  412. if (res < 0x20) break;
  413. if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
  414. case 1:
  415. if ((ch = *src ^ 0x80) >= 0x40) break;
  416. res = (res << 6) | ch;
  417. src++;
  418. if (res < 0x80) break;
  419. if (res <= 0xffff) *dst++ = res;
  420. else
  421. {
  422. res -= 0x10000;
  423. *dst++ = 0xd800 | (res >> 10);
  424. *dst++ = 0xdc00 | (res & 0x3ff);
  425. }
  426. continue;
  427. }
  428. }
  429. *dst++ = 0xfffd;
  430. }
  431. *dst = 0;
  432. *dstlen = dst - ret;
  433. return ret;
  434. }
  435. static char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
  436. {
  437. char *ret, *dst;
  438. dst = ret = xmalloc( srclen * 3 + 1 );
  439. for ( ; srclen; srclen--, src++)
  440. {
  441. unsigned int ch = *src;
  442. if (ch < 0x80) /* 0x00-0x7f: 1 byte */
  443. {
  444. *dst++ = ch;
  445. continue;
  446. }
  447. if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
  448. {
  449. dst[1] = 0x80 | (ch & 0x3f);
  450. ch >>= 6;
  451. dst[0] = 0xc0 | ch;
  452. dst += 2;
  453. continue;
  454. }
  455. if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff)
  456. {
  457. /* 0x10000-0x10ffff: 4 bytes */
  458. ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff);
  459. dst[3] = 0x80 | (ch & 0x3f);
  460. ch >>= 6;
  461. dst[2] = 0x80 | (ch & 0x3f);
  462. ch >>= 6;
  463. dst[1] = 0x80 | (ch & 0x3f);
  464. ch >>= 6;
  465. dst[0] = 0xf0 | ch;
  466. dst += 4;
  467. src++;
  468. srclen--;
  469. continue;
  470. }
  471. if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd; /* invalid surrogate pair */
  472. /* 0x800-0xffff: 3 bytes */
  473. dst[2] = 0x80 | (ch & 0x3f);
  474. ch >>= 6;
  475. dst[1] = 0x80 | (ch & 0x3f);
  476. ch >>= 6;
  477. dst[0] = 0xe0 | ch;
  478. dst += 3;
  479. }
  480. *dst = 0;
  481. *dstlen = dst - ret;
  482. return ret;
  483. }
  484. string_t *convert_string_unicode( const string_t *str, int codepage )
  485. {
  486. string_t *ret = xmalloc(sizeof(*ret));
  487. ret->type = str_unicode;
  488. ret->loc = str->loc;
  489. if (str->type == str_char)
  490. {
  491. if (!codepage) parser_error( "Current language is Unicode only, cannot convert string" );
  492. if (codepage == CP_UTF8)
  493. ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size );
  494. else
  495. ret->str.wstr = codepage_to_unicode( codepage, str->str.cstr, str->size, &ret->size );
  496. if (!ret->str.wstr) parser_error( "Invalid character in string '%.*s' for codepage %u",
  497. str->size, str->str.cstr, codepage );
  498. }
  499. else
  500. {
  501. ret->size = str->size;
  502. ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
  503. memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
  504. ret->str.wstr[ret->size] = 0;
  505. }
  506. return ret;
  507. }
  508. char *convert_string_utf8( const string_t *str, int codepage )
  509. {
  510. int len;
  511. string_t *wstr = convert_string_unicode( str, codepage );
  512. char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len );
  513. free_string( wstr );
  514. return ret;
  515. }
  516. void free_string(string_t *str)
  517. {
  518. if (str->type == str_unicode) free( str->str.wstr );
  519. else free( str->str.cstr );
  520. free( str );
  521. }
  522. /* check if the string is valid utf8 despite a different codepage being in use */
  523. int check_valid_utf8( const string_t *str, int codepage )
  524. {
  525. int i, count;
  526. WCHAR *wstr;
  527. if (!check_utf8) return 0;
  528. if (!codepage) return 0;
  529. if (codepage == CP_UTF8) return 0;
  530. if (!is_valid_codepage( codepage )) return 0;
  531. for (i = count = 0; i < str->size; i++)
  532. {
  533. if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done;
  534. if ((unsigned char)str->str.cstr[i] >= 0xc2) { count++; continue; }
  535. if ((unsigned char)str->str.cstr[i] >= 0x80) goto done;
  536. }
  537. if (!count) return 0; /* no 8-bit chars at all */
  538. wstr = utf8_to_unicode( str->str.cstr, str->size, &count );
  539. for (i = 0; i < count; i++) if (wstr[i] == 0xfffd) break;
  540. free( wstr );
  541. return (i == count);
  542. done:
  543. check_utf8 = 0; /* at least one 8-bit non-utf8 string found, stop checking */
  544. return 0;
  545. }
  546. struct lang2cp
  547. {
  548. unsigned short lang;
  549. unsigned short sublang;
  550. unsigned int cp;
  551. };
  552. /* language to codepage conversion table */
  553. /* specific sublanguages need only be specified if their codepage */
  554. /* differs from the default (SUBLANG_NEUTRAL) */
  555. static const struct lang2cp lang2cps[] =
  556. {
  557. { LANG_AFRIKAANS, SUBLANG_NEUTRAL, 1252 },
  558. { LANG_ALBANIAN, SUBLANG_NEUTRAL, 1250 },
  559. { LANG_ALSATIAN, SUBLANG_NEUTRAL, 1252 },
  560. { LANG_AMHARIC, SUBLANG_NEUTRAL, 0 },
  561. { LANG_ARABIC, SUBLANG_NEUTRAL, 1256 },
  562. { LANG_ARMENIAN, SUBLANG_NEUTRAL, 0 },
  563. { LANG_ASSAMESE, SUBLANG_NEUTRAL, 0 },
  564. { LANG_ASTURIAN, SUBLANG_NEUTRAL, 1252 },
  565. { LANG_AZERI, SUBLANG_NEUTRAL, 1254 },
  566. { LANG_AZERI, SUBLANG_AZERI_CYRILLIC, 1251 },
  567. { LANG_BASHKIR, SUBLANG_NEUTRAL, 1251 },
  568. { LANG_BASQUE, SUBLANG_NEUTRAL, 1252 },
  569. { LANG_BELARUSIAN, SUBLANG_NEUTRAL, 1251 },
  570. { LANG_BENGALI, SUBLANG_NEUTRAL, 0 },
  571. { LANG_BOSNIAN, SUBLANG_NEUTRAL, 1250 },
  572. { LANG_BOSNIAN, SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC, 1251 },
  573. { LANG_BRETON, SUBLANG_NEUTRAL, 1252 },
  574. { LANG_BULGARIAN, SUBLANG_NEUTRAL, 1251 },
  575. { LANG_CATALAN, SUBLANG_NEUTRAL, 1252 },
  576. { LANG_CHINESE, SUBLANG_NEUTRAL, 950 },
  577. { LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED, 936 },
  578. { LANG_CHINESE, SUBLANG_CHINESE_SINGAPORE, 936 },
  579. #ifdef LANG_CORNISH
  580. { LANG_CORNISH, SUBLANG_NEUTRAL, 1252 },
  581. #endif /* LANG_CORNISH */
  582. { LANG_CORSICAN, SUBLANG_NEUTRAL, 1252 },
  583. { LANG_CROATIAN, SUBLANG_NEUTRAL, 1250 },
  584. { LANG_CZECH, SUBLANG_NEUTRAL, 1250 },
  585. { LANG_DANISH, SUBLANG_NEUTRAL, 1252 },
  586. { LANG_DARI, SUBLANG_NEUTRAL, 1256 },
  587. { LANG_DIVEHI, SUBLANG_NEUTRAL, 0 },
  588. { LANG_DUTCH, SUBLANG_NEUTRAL, 1252 },
  589. { LANG_ENGLISH, SUBLANG_NEUTRAL, 1252 },
  590. #ifdef LANG_ESPERANTO
  591. { LANG_ESPERANTO, SUBLANG_NEUTRAL, 1252 },
  592. #endif /* LANG_ESPERANTO */
  593. { LANG_ESTONIAN, SUBLANG_NEUTRAL, 1257 },
  594. { LANG_FAEROESE, SUBLANG_NEUTRAL, 1252 },
  595. { LANG_FILIPINO, SUBLANG_NEUTRAL, 1252 },
  596. { LANG_FINNISH, SUBLANG_NEUTRAL, 1252 },
  597. { LANG_FRENCH, SUBLANG_NEUTRAL, 1252 },
  598. { LANG_FRISIAN, SUBLANG_NEUTRAL, 1252 },
  599. #ifdef LANG_MANX_GAELIC
  600. { LANG_MANX_GAELIC, SUBLANG_NEUTRAL, 1252 },
  601. #endif /* LANG_MANX_GAELIC */
  602. { LANG_GALICIAN, SUBLANG_NEUTRAL, 1252 },
  603. { LANG_GEORGIAN, SUBLANG_NEUTRAL, 0 },
  604. { LANG_GERMAN, SUBLANG_NEUTRAL, 1252 },
  605. { LANG_GREEK, SUBLANG_NEUTRAL, 1253 },
  606. { LANG_GREENLANDIC, SUBLANG_NEUTRAL, 1252 },
  607. { LANG_GUJARATI, SUBLANG_NEUTRAL, 0 },
  608. { LANG_HAUSA, SUBLANG_NEUTRAL, 1252 },
  609. { LANG_HEBREW, SUBLANG_NEUTRAL, 1255 },
  610. { LANG_HINDI, SUBLANG_NEUTRAL, 0 },
  611. { LANG_HUNGARIAN, SUBLANG_NEUTRAL, 1250 },
  612. { LANG_ICELANDIC, SUBLANG_NEUTRAL, 1252 },
  613. { LANG_IGBO, SUBLANG_NEUTRAL, 1252 },
  614. { LANG_INDONESIAN, SUBLANG_NEUTRAL, 1252 },
  615. { LANG_INUKTITUT, SUBLANG_NEUTRAL, 0 },
  616. { LANG_INUKTITUT, SUBLANG_INUKTITUT_CANADA_LATIN, 0 },
  617. { LANG_INVARIANT, SUBLANG_NEUTRAL, 0 },
  618. { LANG_IRISH, SUBLANG_NEUTRAL, 1252 },
  619. { LANG_ITALIAN, SUBLANG_NEUTRAL, 1252 },
  620. { LANG_JAPANESE, SUBLANG_NEUTRAL, 932 },
  621. { LANG_KANNADA, SUBLANG_NEUTRAL, 0 },
  622. { LANG_KAZAK, SUBLANG_NEUTRAL, 1251 },
  623. { LANG_KHMER, SUBLANG_NEUTRAL, 0 },
  624. { LANG_KICHE, SUBLANG_NEUTRAL, 1252 },
  625. { LANG_KINYARWANDA, SUBLANG_NEUTRAL, 1252 },
  626. { LANG_KONKANI, SUBLANG_NEUTRAL, 0 },
  627. { LANG_KOREAN, SUBLANG_NEUTRAL, 949 },
  628. { LANG_KYRGYZ, SUBLANG_NEUTRAL, 1251 },
  629. { LANG_LAO, SUBLANG_NEUTRAL, 0 },
  630. { LANG_LATVIAN, SUBLANG_NEUTRAL, 1257 },
  631. { LANG_LITHUANIAN, SUBLANG_NEUTRAL, 1257 },
  632. { LANG_LOWER_SORBIAN, SUBLANG_NEUTRAL, 1252 },
  633. { LANG_LUXEMBOURGISH, SUBLANG_NEUTRAL, 1252 },
  634. { LANG_MACEDONIAN, SUBLANG_NEUTRAL, 1251 },
  635. { LANG_MALAY, SUBLANG_NEUTRAL, 1252 },
  636. { LANG_MALAYALAM, SUBLANG_NEUTRAL, 0 },
  637. { LANG_MALTESE, SUBLANG_NEUTRAL, 0 },
  638. { LANG_MAORI, SUBLANG_NEUTRAL, 0 },
  639. { LANG_MAPUDUNGUN, SUBLANG_NEUTRAL, 1252 },
  640. { LANG_MARATHI, SUBLANG_NEUTRAL, 0 },
  641. { LANG_MOHAWK, SUBLANG_NEUTRAL, 1252 },
  642. { LANG_MONGOLIAN, SUBLANG_NEUTRAL, 1251 },
  643. { LANG_NEPALI, SUBLANG_NEUTRAL, 0 },
  644. { LANG_NEUTRAL, SUBLANG_NEUTRAL, 1252 },
  645. { LANG_NORWEGIAN, SUBLANG_NEUTRAL, 1252 },
  646. { LANG_OCCITAN, SUBLANG_NEUTRAL, 1252 },
  647. { LANG_ORIYA, SUBLANG_NEUTRAL, 0 },
  648. { LANG_PASHTO, SUBLANG_NEUTRAL, 0 },
  649. { LANG_PERSIAN, SUBLANG_NEUTRAL, 1256 },
  650. { LANG_POLISH, SUBLANG_NEUTRAL, 1250 },
  651. { LANG_PORTUGUESE, SUBLANG_NEUTRAL, 1252 },
  652. { LANG_PUNJABI, SUBLANG_NEUTRAL, 0 },
  653. { LANG_QUECHUA, SUBLANG_NEUTRAL, 1252 },
  654. { LANG_ROMANIAN, SUBLANG_NEUTRAL, 1250 },
  655. { LANG_ROMANSH, SUBLANG_NEUTRAL, 1252 },
  656. { LANG_RUSSIAN, SUBLANG_NEUTRAL, 1251 },
  657. { LANG_SAMI, SUBLANG_NEUTRAL, 1252 },
  658. { LANG_SANSKRIT, SUBLANG_NEUTRAL, 0 },
  659. { LANG_SCOTTISH_GAELIC,SUBLANG_NEUTRAL, 1252 },
  660. { LANG_SERBIAN, SUBLANG_NEUTRAL, 1250 },
  661. { LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC, 1251 },
  662. { LANG_SINHALESE, SUBLANG_NEUTRAL, 0 },
  663. { LANG_SLOVAK, SUBLANG_NEUTRAL, 1250 },
  664. { LANG_SLOVENIAN, SUBLANG_NEUTRAL, 1250 },
  665. { LANG_SOTHO, SUBLANG_NEUTRAL, 1252 },
  666. { LANG_SPANISH, SUBLANG_NEUTRAL, 1252 },
  667. { LANG_SWAHILI, SUBLANG_NEUTRAL, 1252 },
  668. { LANG_SWEDISH, SUBLANG_NEUTRAL, 1252 },
  669. { LANG_SYRIAC, SUBLANG_NEUTRAL, 0 },
  670. { LANG_TAJIK, SUBLANG_NEUTRAL, 1251 },
  671. { LANG_TAMAZIGHT, SUBLANG_NEUTRAL, 1252 },
  672. { LANG_TAMIL, SUBLANG_NEUTRAL, 0 },
  673. { LANG_TATAR, SUBLANG_NEUTRAL, 1251 },
  674. { LANG_TELUGU, SUBLANG_NEUTRAL, 0 },
  675. { LANG_THAI, SUBLANG_NEUTRAL, 874 },
  676. { LANG_TIBETAN, SUBLANG_NEUTRAL, 0 },
  677. { LANG_TSWANA, SUBLANG_NEUTRAL, 1252 },
  678. { LANG_TURKISH, SUBLANG_NEUTRAL, 1254 },
  679. { LANG_TURKMEN, SUBLANG_NEUTRAL, 1250 },
  680. { LANG_UIGHUR, SUBLANG_NEUTRAL, 1256 },
  681. { LANG_UKRAINIAN, SUBLANG_NEUTRAL, 1251 },
  682. { LANG_UPPER_SORBIAN, SUBLANG_NEUTRAL, 1252 },
  683. { LANG_URDU, SUBLANG_NEUTRAL, 1256 },
  684. { LANG_UZBEK, SUBLANG_NEUTRAL, 1254 },
  685. { LANG_UZBEK, SUBLANG_UZBEK_CYRILLIC, 1251 },
  686. { LANG_VIETNAMESE, SUBLANG_NEUTRAL, 1258 },
  687. #ifdef LANG_WALON
  688. { LANG_WALON, SUBLANG_NEUTRAL, 1252 },
  689. #endif /* LANG_WALON */
  690. { LANG_WELSH, SUBLANG_NEUTRAL, 1252 },
  691. { LANG_WOLOF, SUBLANG_NEUTRAL, 1252 },
  692. { LANG_XHOSA, SUBLANG_NEUTRAL, 1252 },
  693. { LANG_YAKUT, SUBLANG_NEUTRAL, 1251 },
  694. { LANG_YI, SUBLANG_NEUTRAL, 0 },
  695. { LANG_YORUBA, SUBLANG_NEUTRAL, 1252 },
  696. { LANG_ZULU, SUBLANG_NEUTRAL, 1252 }
  697. };
  698. int get_language_codepage( unsigned short lang, unsigned short sublang )
  699. {
  700. unsigned int i;
  701. int cp = -1, defcp = -1;
  702. for (i = 0; i < ARRAY_SIZE(lang2cps); i++)
  703. {
  704. if (lang2cps[i].lang != lang) continue;
  705. if (lang2cps[i].sublang == sublang)
  706. {
  707. cp = lang2cps[i].cp;
  708. break;
  709. }
  710. if (lang2cps[i].sublang == SUBLANG_NEUTRAL) defcp = lang2cps[i].cp;
  711. }
  712. if (cp == -1) cp = defcp;
  713. return cp;
  714. }