nls.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. /*
  2. * Dump a NLS file
  3. *
  4. * Copyright 2020 Alexandre Julliard
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19. */
  20. #include "config.h"
  21. #include "wine/port.h"
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <assert.h>
  25. #include "windef.h"
  26. #include "winedump.h"
  27. static const void *read_data( unsigned int *pos, unsigned int size )
  28. {
  29. const void *ret = PRD( *pos, size );
  30. *pos += size;
  31. return ret;
  32. }
  33. static unsigned short mapchar( const unsigned short *table, unsigned int len, unsigned short ch )
  34. {
  35. unsigned int off = table[ch >> 8] + ((ch >> 4) & 0x0f);
  36. if (off >= len) return 0;
  37. off = table[off] + (ch & 0x0f);
  38. if (off >= len) return 0;
  39. return ch + table[off];
  40. }
  41. static void dump_offset_table( const unsigned short *table, unsigned int len )
  42. {
  43. int i, ch;
  44. for (i = 0; i < 0x10000; i++)
  45. {
  46. if (!(i % 16)) printf( "\n%04x:", i );
  47. ch = mapchar( table, len, i );
  48. if (ch == i) printf( " ...." );
  49. else printf( " %04x", ch );
  50. }
  51. }
  52. struct ctype
  53. {
  54. WORD c1, c2, c3;
  55. };
  56. static const char *get_ctype( const struct ctype *ctype )
  57. {
  58. static char buffer[100];
  59. static const char *c1[] = { "up ", "lo ", "dg ", "sp ", "pt ", "cl ", "bl ", "xd ", "al " };
  60. static const char *c2[] = { " ", "L ", "R ", "EN", "ES", "ET",
  61. "AN", "CS", "B ", "S ", "WS", "ON" };
  62. static const char *c3[] = { "ns ", "di ", "vo ", "sy ", "ka ", "hi ", "hw ", "fw ",
  63. "id ", "ks ", "lx ", "hi ", "lo ", " ", " ", "al " };
  64. int i;
  65. strcpy( buffer, "| " );
  66. for (i = 0; i < ARRAY_SIZE(c1); i++)
  67. strcat( buffer, (ctype->c1 & (1 << i)) ? c1[i] : "__ " );
  68. strcat( buffer, "| " );
  69. strcat( buffer, ctype->c2 < ARRAY_SIZE(c2) ? c2[ctype->c2] : "??" );
  70. strcat( buffer, " | " );
  71. for (i = 0; i < ARRAY_SIZE(c3); i++)
  72. strcat( buffer, (ctype->c3 & (1 << i)) ? c3[i] : "__ " );
  73. strcat( buffer, "|" );
  74. return buffer;
  75. }
  76. static void dump_ctype_table( const USHORT *ptr )
  77. {
  78. const struct ctype *ctypes = (const struct ctype *)(ptr + 2);
  79. const BYTE *types = (const BYTE *)ptr + ptr[1] + 2;
  80. int i, len = (ptr[1] - 2) / sizeof(*ctypes);
  81. printf( " CTYPE1 CTYPE2 CTYPE3\n" );
  82. for (i = 0; i < 0x10000; i++)
  83. {
  84. const BYTE *b = types + ((const WORD *)types)[i >> 8];
  85. b = types + ((const WORD *)b)[(i >> 4) & 0x0f] + (i & 0x0f);
  86. if (*b < len) printf( "%04x %s\n", i, get_ctype( ctypes + *b ));
  87. else printf( "%04x ??? %02x\n", i, *b );
  88. }
  89. printf( "\n" );
  90. }
  91. static void dump_casemap(void)
  92. {
  93. unsigned int pos = 0, upper_len, lower_len;
  94. const unsigned short *header, *upper, *lower;
  95. if (!(header = read_data( &pos, 2 * sizeof(*header) ))) return;
  96. upper_len = header[1];
  97. if (!(upper = read_data( &pos, upper_len * sizeof(*upper) )))
  98. {
  99. printf( "Invalid len %04x\n", header[1] );
  100. return;
  101. }
  102. lower_len = dump_total_len / sizeof(*lower) - 2 - upper_len;
  103. if (!(lower = read_data( &pos, lower_len * sizeof(*lower) ))) return;
  104. printf( "Magic: %04x\n", header[0] );
  105. printf( "Upper-case table:\n" );
  106. dump_offset_table( upper, upper_len );
  107. printf( "\n\nLower-case table:\n" );
  108. dump_offset_table( lower, lower_len );
  109. printf( "\n\n" );
  110. }
  111. static void dump_codepage(void)
  112. {
  113. unsigned int i, j, uni2cp_offset, pos = 0;
  114. const unsigned short *header, *ptr;
  115. if (!(header = read_data( &pos, 13 * sizeof(*header) ))) return;
  116. printf( "Codepage: %03u\n", header[1] );
  117. printf( "Char size: %u\n", header[2] );
  118. printf( "Default char A: %04x / %04x\n", header[3], header[5] );
  119. printf( "Default char W: %04x / %04x\n", header[4], header[6] );
  120. if (header[2] == 2)
  121. {
  122. printf( "Lead bytes: " );
  123. for (i = 0; i < 12; i++)
  124. {
  125. unsigned char val = ((unsigned char *)(header + 7))[i];
  126. if (!val) break;
  127. printf( "%c%02x", (i % 2) ? '-' : ' ', val );
  128. }
  129. printf( "\n" );
  130. }
  131. printf( "\nCharacter map:\n" );
  132. pos = header[0] * sizeof(*ptr);
  133. if (!(ptr = read_data( &pos, sizeof(*ptr) ))) return;
  134. uni2cp_offset = pos / sizeof(*ptr) + *ptr;
  135. if (!(ptr = read_data( &pos, 256 * sizeof(*ptr) ))) return;
  136. for (i = 0; i < 256; i++)
  137. {
  138. if (!(i % 16)) printf( "\n%02x:", i );
  139. printf( " %04x", ptr[i] );
  140. }
  141. printf( "\n" );
  142. if (!(ptr = read_data( &pos, sizeof(*ptr) ))) return;
  143. if (*ptr == 256)
  144. {
  145. if (!(ptr = read_data( &pos, 256 * sizeof(*ptr) ))) return;
  146. printf( "\nGlyph table:\n" );
  147. for (i = 0; i < 256; i++)
  148. {
  149. if (!(i % 16)) printf( "\n%02x:", i );
  150. printf( " %04x", ptr[i] );
  151. }
  152. printf( "\n" );
  153. }
  154. if (!(ptr = read_data( &pos, sizeof(*ptr) ))) return;
  155. if (*ptr)
  156. {
  157. if (!(ptr = read_data( &pos, (uni2cp_offset - pos) * sizeof(*ptr) ))) return;
  158. for (i = 0; i < 256; i++)
  159. {
  160. if (!ptr[i] || ptr[i] > pos - 256) continue;
  161. for (j = 0; j < 256; j++)
  162. {
  163. if (!(j % 16)) printf( "\n%02x%02x:", i, j );
  164. printf( " %04x", ptr[ptr[i] + j] );
  165. }
  166. }
  167. printf( "\n" );
  168. }
  169. printf( "\nUnicode table:\n" );
  170. pos = uni2cp_offset * sizeof(*ptr);
  171. if (header[2] == 2)
  172. {
  173. if (!(ptr = read_data( &pos, 65536 * sizeof(*ptr) ))) return;
  174. for (i = 0; i < 65536; i++)
  175. {
  176. if (!(i % 16)) printf( "\n%04x:", i );
  177. printf( " %04x", ptr[i] );
  178. }
  179. printf( "\n" );
  180. }
  181. else
  182. {
  183. const unsigned char *uni2cp;
  184. if (!(uni2cp = read_data( &pos, 65536 ))) return;
  185. for (i = 0; i < 65536; i++)
  186. {
  187. if (!(i % 16)) printf( "\n%04x:", i );
  188. printf( " %02x", uni2cp[i] );
  189. }
  190. printf( "\n" );
  191. }
  192. printf( "\n" );
  193. }
  194. struct norm_table
  195. {
  196. WCHAR name[13]; /* 00 file name */
  197. USHORT checksum[3]; /* 1a checksum? */
  198. USHORT version[4]; /* 20 Unicode version */
  199. USHORT form; /* 28 normalization form */
  200. USHORT len_factor; /* 2a factor for length estimates */
  201. USHORT unknown1; /* 2c */
  202. USHORT decomp_size; /* 2e decomposition hash size */
  203. USHORT comp_size; /* 30 composition hash size */
  204. USHORT unknown2; /* 32 */
  205. USHORT classes; /* 34 combining classes table offset */
  206. USHORT props_level1; /* 36 char properties table level 1 offset */
  207. USHORT props_level2; /* 38 char properties table level 2 offset */
  208. USHORT decomp_hash; /* 3a decomposition hash table offset */
  209. USHORT decomp_map; /* 3c decomposition character map table offset */
  210. USHORT decomp_seq; /* 3e decomposition character sequences offset */
  211. USHORT comp_hash; /* 40 composition hash table offset */
  212. USHORT comp_seq; /* 42 composition character sequences offset */
  213. };
  214. static int offset_scale = 1; /* older versions use byte offsets */
  215. #define GET_TABLE(info,table) ((const void *)((const BYTE *)info + (info->table * offset_scale)))
  216. static unsigned int get_utf16( const WCHAR *str )
  217. {
  218. if (str[0] >= 0xd800 && str[0] <= 0xdbff &&
  219. str[1] >= 0xdc00 && str[1] <= 0xdfff)
  220. return 0x10000 + ((str[0] & 0x3ff) << 10) + (str[1] & 0x3ff);
  221. return str[0];
  222. }
  223. static BYTE rol( BYTE val, BYTE count )
  224. {
  225. return (val << count) | (val >> (8 - count));
  226. }
  227. static unsigned char get_char_props( const struct norm_table *info, unsigned int ch )
  228. {
  229. const BYTE *level1 = GET_TABLE( info, props_level1 );
  230. const BYTE *level2 = GET_TABLE( info, props_level2 );
  231. BYTE off = level1[ch / 128];
  232. if (!off || off >= 0xfb) return rol( off, 5 );
  233. return level2[(off - 1) * 128 + ch % 128];
  234. }
  235. static const WCHAR *get_decomposition( const struct norm_table *info,
  236. unsigned int ch, unsigned int *ret_len )
  237. {
  238. const USHORT *hash_table = GET_TABLE( info, decomp_hash );
  239. const WCHAR *seq = GET_TABLE(info, decomp_seq );
  240. const WCHAR *ret;
  241. unsigned int i, pos, end, len, hash;
  242. *ret_len = 1 + (ch >= 0x10000);
  243. if (!info->decomp_size) return NULL;
  244. hash = ch % info->decomp_size;
  245. pos = hash_table[hash];
  246. if (pos >> 13)
  247. {
  248. if (get_char_props( info, ch ) != 0xbf) return NULL;
  249. ret = seq + (pos & 0x1fff);
  250. len = pos >> 13;
  251. }
  252. else
  253. {
  254. const struct { WCHAR src; USHORT dst; } *pairs = GET_TABLE( info, decomp_map );
  255. /* find the end of the hash bucket */
  256. for (i = hash + 1; i < info->decomp_size; i++) if (!(hash_table[i] >> 13)) break;
  257. if (i < info->decomp_size) end = hash_table[i];
  258. else for (end = pos; pairs[end].src; end++) ;
  259. for ( ; pos < end; pos++)
  260. {
  261. if (pairs[pos].src != (WCHAR)ch) continue;
  262. ret = seq + (pairs[pos].dst & 0x1fff);
  263. len = pairs[pos].dst >> 13;
  264. break;
  265. }
  266. if (pos >= end) return NULL;
  267. }
  268. if (len == 7) while (ret[len]) len++;
  269. *ret_len = len;
  270. return ret;
  271. }
  272. static int cmp_compos( const void *a, const void *b )
  273. {
  274. int ret = ((unsigned int *)a)[0] - ((unsigned int *)b)[0];
  275. if (!ret) ret = ((unsigned int *)a)[1] - ((unsigned int *)b)[1];
  276. return ret;
  277. }
  278. static void dump_norm(void)
  279. {
  280. const struct norm_table *info;
  281. const BYTE *classes;
  282. unsigned int i;
  283. char name[13];
  284. if (!(info = PRD( 0, sizeof(*info) ))) return;
  285. for (i = 0; i < sizeof(name); i++) name[i] = info->name[i];
  286. printf( "Name: %s\n", name );
  287. switch (info->form)
  288. {
  289. case 1: printf( "Form: NFC\n" ); break;
  290. case 2: printf( "Form: NFD\n" ); break;
  291. case 5: printf( "Form: NFKC\n" ); break;
  292. case 6: printf( "Form: NFKD\n" ); break;
  293. case 13: printf( "Form: IDNA\n" ); break;
  294. default: printf( "Form: %u\n", info->form ); break;
  295. }
  296. printf( "Version: %u.%u.%u\n", info->version[0], info->version[1], info->version[2] );
  297. printf( "Factor: %u\n", info->len_factor );
  298. if (info->classes == sizeof(*info) / 2) offset_scale = 2;
  299. classes = GET_TABLE( info, classes );
  300. printf( "\nCharacter classes:\n" );
  301. for (i = 0; i < 0x110000; i++)
  302. {
  303. BYTE flags = get_char_props( info, i );
  304. if (!(i % 16)) printf( "\n%06x:", i );
  305. if (!flags || (flags & 0x3f) == 0x3f)
  306. {
  307. static const char *flagstr[4] = { ".....", "Undef", "QC=No", "Inval" };
  308. printf( " %s", flagstr[flags >> 6] );
  309. }
  310. else
  311. {
  312. static const char flagschar[4] = ".+*M";
  313. BYTE class = classes[flags & 0x3f];
  314. printf( " %c.%03u", flagschar[flags >> 6], class );
  315. }
  316. }
  317. printf( "\n\nDecompositions:\n\n" );
  318. for (i = 0; i < 0x110000; i++)
  319. {
  320. unsigned int j, len;
  321. const WCHAR *decomp = get_decomposition( info, i, &len );
  322. if (!decomp) continue;
  323. printf( "%04x ->", i );
  324. for (j = 0; j < len; j++)
  325. {
  326. unsigned int ch = get_utf16( decomp + j );
  327. printf( " %04x", ch );
  328. if (ch >= 0x10000) j++;
  329. }
  330. printf( "\n" );
  331. }
  332. if (info->comp_size)
  333. {
  334. unsigned int pos, len = (dump_total_len - info->comp_seq * offset_scale) / sizeof(WCHAR);
  335. const WCHAR *seq = GET_TABLE( info, comp_seq );
  336. unsigned int *map = malloc( len * sizeof(*map) );
  337. printf( "\nCompositions:\n\n" );
  338. /* ignore hash table, simply dump all the sequences */
  339. for (i = pos = 0; i < len; pos += 3)
  340. {
  341. map[pos] = get_utf16( seq + i );
  342. i += 1 + (map[pos] >= 0x10000);
  343. map[pos+1] = get_utf16( seq + i );
  344. i += 1 + (map[pos+1] >= 0x10000);
  345. map[pos+2] = get_utf16( seq + i );
  346. i += 1 + (map[pos+2] >= 0x10000);
  347. }
  348. qsort( map, pos / 3, 3 * sizeof(*map), cmp_compos );
  349. for (i = 0; i < pos; i += 3) printf( "%04x %04x -> %04x\n", map[i], map[i + 1], map[i + 2] );
  350. free( map );
  351. }
  352. printf( "\n" );
  353. }
  354. struct sortguid
  355. {
  356. GUID id; /* sort GUID */
  357. DWORD flags; /* flags */
  358. DWORD compr; /* offset to compression table */
  359. DWORD except; /* exception table offset in sortkey table */
  360. DWORD ling_except; /* exception table offset for linguistic casing */
  361. DWORD casemap; /* linguistic casemap table offset */
  362. };
  363. #define FLAG_HAS_3_BYTE_WEIGHTS 0x01
  364. #define FLAG_REVERSEDIACRITICS 0x10
  365. #define FLAG_DOUBLECOMPRESSION 0x20
  366. #define FLAG_INVERSECASING 0x40
  367. struct language_id
  368. {
  369. DWORD offset;
  370. WCHAR name[32];
  371. };
  372. struct compression
  373. {
  374. DWORD offset;
  375. WCHAR minchar, maxchar;
  376. WORD len[8];
  377. };
  378. struct comprlang
  379. {
  380. struct compression compr;
  381. WCHAR name[32];
  382. };
  383. static const char *get_sortkey( DWORD key )
  384. {
  385. static char buffer[16];
  386. if (!key) return "....";
  387. if ((WORD)key == 0x200)
  388. sprintf( buffer, "expand %04x", key >> 16 );
  389. else
  390. sprintf( buffer, "%u.%u.%u.%u", (BYTE)(key >> 8), (BYTE)key, (BYTE)(key >> 16), (BYTE)(key >> 24) );
  391. return buffer;
  392. }
  393. static const void *dump_expansions( const DWORD *ptr )
  394. {
  395. DWORD i, count = *ptr++;
  396. printf( "\nExpansions: (count=%04x)\n\n", count );
  397. for (i = 0; i < count; i++)
  398. {
  399. const WCHAR *p = (const WCHAR *)(ptr + i);
  400. printf( " %04x: %04x %04x\n", i, p[0], p[1] );
  401. }
  402. return ptr + count;
  403. }
  404. static void dump_exceptions( const DWORD *sortkeys, DWORD offset )
  405. {
  406. int i, j;
  407. const DWORD *table = sortkeys + offset;
  408. for (i = 0; i < 0x100; i++)
  409. {
  410. if (table[i] == i * 0x100) continue;
  411. for (j = 0; j < 0x100; j++)
  412. {
  413. if (sortkeys[table[i] + j] == sortkeys[i * 0x100 + j]) continue;
  414. printf( " %04x: %s\n", i * 0x100 + j, get_sortkey( sortkeys[table[i] + j] ));
  415. }
  416. }
  417. }
  418. static const void *dump_compression( const struct compression *compr, const WCHAR *table )
  419. {
  420. int i, j, k;
  421. const WCHAR *p = table + compr->offset;
  422. printf( " min=%04x max=%04x counts=%u,%u,%u,%u,%u,%u,%u,%u\n",
  423. compr->minchar, compr->maxchar,
  424. compr->len[0], compr->len[1], compr->len[2], compr->len[3],
  425. compr->len[4], compr->len[5], compr->len[6], compr->len[7] );
  426. for (i = 0; i < 8; i++)
  427. {
  428. for (j = 0; j < compr->len[i]; j++)
  429. {
  430. printf( " " );
  431. for (k = 0; k < i + 2; k++) printf( " %04x", *p++ );
  432. p = (const WCHAR *)(((ULONG_PTR)p + 3) & ~3);
  433. printf( " -> %s\n", get_sortkey( *(const DWORD *)p ));
  434. p += 2;
  435. }
  436. }
  437. return p;
  438. }
  439. static const void *dump_multiple_weights( const DWORD *ptr )
  440. {
  441. int i, count = *ptr++;
  442. const WCHAR *p;
  443. printf( "\nMultiple weights: (count=%u)\n\n", count );
  444. p = (const WCHAR *)ptr;
  445. for (i = 0; i < count; i++)
  446. {
  447. BYTE weight = p[i];
  448. BYTE count = p[i] >> 8;
  449. printf( "%u - %u\n", weight, weight + count );
  450. }
  451. return ptr + (count + 1) / 2;
  452. }
  453. static void dump_sort( int old_version )
  454. {
  455. const struct
  456. {
  457. DWORD sortkeys;
  458. DWORD casemaps;
  459. DWORD ctypes;
  460. DWORD sortids;
  461. } *header;
  462. const struct compression *compr;
  463. const struct sortguid *guids;
  464. const struct comprlang *comprlangs;
  465. const struct language_id *language_ids = NULL;
  466. const WORD *casemaps, *map;
  467. const DWORD *sortkeys, *ptr;
  468. const WCHAR *p = NULL;
  469. int i, j, size, len;
  470. int nb_casemaps = 0, casemap_offsets[16];
  471. if (!(header = PRD( 0, sizeof(*header) ))) return;
  472. if (!(sortkeys = PRD( header->sortkeys, header->casemaps - header->sortkeys ))) return;
  473. printf( "\nSort keys:\n" );
  474. for (i = 0; i < 0x10000; i++)
  475. {
  476. if (!(i % 8)) printf( "\n%04x:", i );
  477. printf( " %16s", get_sortkey( sortkeys[i] ));
  478. }
  479. printf( "\n\n" );
  480. size = (header->ctypes - header->casemaps) / sizeof(*casemaps);
  481. if (!(casemaps = PRD( header->casemaps, size * sizeof(*casemaps) ))) return;
  482. len = 0;
  483. if (old_version)
  484. {
  485. ptr = (const DWORD *)casemaps;
  486. len = *ptr++;
  487. language_ids = (const struct language_id *)ptr;
  488. casemaps = (const WORD *)(language_ids + len);
  489. }
  490. map = casemaps;
  491. while (size)
  492. {
  493. const WORD *upper = map + 2;
  494. const WORD *lower = map + 2 + map[1];
  495. const WORD *end = map + map[1] + 1 + map[map[1] + 1];
  496. if (map[0] != 1) break;
  497. printf( "\nCase mapping table %u:\n", nb_casemaps );
  498. casemap_offsets[nb_casemaps++] = map - casemaps;
  499. for (j = 0; j < len; j++)
  500. {
  501. if (language_ids[j].offset != map - casemaps) continue;
  502. printf( "Language: %s\n", get_unicode_str( language_ids[j].name, -1 ));
  503. break;
  504. }
  505. printf( "\nUpper-case table:\n" );
  506. dump_offset_table( upper, lower - upper );
  507. printf( "\n\nLower-case table:\n" );
  508. dump_offset_table( lower, end - lower );
  509. printf( "\n\n" );
  510. size -= (end - map);
  511. map = end;
  512. }
  513. if (!(p = PRD( header->ctypes, header->sortids - header->ctypes ))) return;
  514. printf( "\nCTYPE table:\n\n" );
  515. dump_ctype_table( p );
  516. printf( "\nSort tables:\n\n" );
  517. size = (dump_total_len - header->sortids) / sizeof(*ptr);
  518. if (!(ptr = PRD( header->sortids, size * sizeof(*ptr) ))) return;
  519. if (old_version)
  520. {
  521. len = *ptr++;
  522. for (i = 0; i < len; i++, ptr += 2) printf( "NLS version: %08x %08x\n", ptr[0], ptr[1] );
  523. len = *ptr++;
  524. for (i = 0; i < len; i++, ptr += 2) printf( "Defined version: %08x %08x\n", ptr[0], ptr[1] );
  525. len = *ptr++;
  526. printf( "\nReversed diacritics:\n\n" );
  527. for (i = 0; i < len; i++)
  528. {
  529. const WCHAR *name = (const WCHAR *)ptr;
  530. printf( "%s\n", get_unicode_str( name, -1 ));
  531. ptr += 16;
  532. }
  533. len = *ptr++;
  534. printf( "\nDouble compression:\n\n" );
  535. for (i = 0; i < len; i++)
  536. {
  537. const WCHAR *name = (const WCHAR *)ptr;
  538. printf( "%s\n", get_unicode_str( name, -1 ));
  539. ptr += 16;
  540. }
  541. ptr = dump_expansions( ptr );
  542. printf( "\nCompressions:\n" );
  543. size = *ptr++;
  544. comprlangs = (const struct comprlang *)ptr;
  545. for (i = 0; i < size; i++)
  546. {
  547. printf( "\n %s\n", get_unicode_str( comprlangs[i].name, -1 ));
  548. ptr = dump_compression( &comprlangs[i].compr, (const WCHAR *)(comprlangs + size) );
  549. }
  550. ptr = dump_multiple_weights( ptr );
  551. size = *ptr++;
  552. printf( "\nJamo sort:\n\n" );
  553. for (i = 0; i < size; i++, ptr += 2)
  554. {
  555. const struct jamo { BYTE val[5], off, len; } *jamo = (const struct jamo *)ptr;
  556. printf( "%04x: %02x %02x %02x %02x %02x off=%02x len=%02x\n", 0x1100 + i, jamo->val[0],
  557. jamo->val[1], jamo->val[2], jamo->val[3], jamo->val[4],
  558. jamo->off, jamo->len );
  559. }
  560. size = *ptr++;
  561. printf( "\nJamo second chars:\n\n" );
  562. for (i = 0; i < size; i++, ptr += 2)
  563. {
  564. const struct jamo { WORD ch; BYTE val[5], len; } *jamo = (const struct jamo *)ptr;
  565. printf( "%02x: %04x: %02x %02x %02x %02x %02x len=%02x\n", i, jamo->ch, jamo->val[0],
  566. jamo->val[1], jamo->val[2], jamo->val[3], jamo->val[4], jamo->len );
  567. }
  568. size = *ptr++;
  569. printf( "\nExceptions:\n" );
  570. language_ids = (const struct language_id *)ptr;
  571. for (i = 0; i < size; i++)
  572. {
  573. printf( "\n %08x %s\n", language_ids[i].offset, get_unicode_str( language_ids[i].name, -1 ));
  574. dump_exceptions( sortkeys, language_ids[i].offset );
  575. }
  576. }
  577. else
  578. {
  579. int guid_count = ptr[1];
  580. printf( "NLS version: %08x\n\n", ptr[0] );
  581. printf( "Sort GUIDs:\n\n" );
  582. guids = (const struct sortguid *)(ptr + 2);
  583. for (i = 0; i < guid_count; i++)
  584. {
  585. for (j = 0; j < nb_casemaps; j++) if (casemap_offsets[j] == guids[i].casemap) break;
  586. printf( " %s flags=%08x compr=%08x casemap=%d\n", get_guid_str( &guids[i].id ),
  587. guids[i].flags, guids[i].compr, j < nb_casemaps ? j : -1 );
  588. }
  589. ptr = dump_expansions( (const DWORD *)(guids + guid_count) );
  590. size = *ptr++;
  591. printf( "\nCompressions:\n" );
  592. compr = (const struct compression *)ptr;
  593. for (i = 0; i < size; i++)
  594. {
  595. printf( "\n" );
  596. for (j = 0; j < guid_count; j++)
  597. if (guids[j].compr == i) printf( " %s\n", get_guid_str( &guids[j].id ));
  598. ptr = dump_compression( compr + i, (const WCHAR *)(compr + size) );
  599. }
  600. ptr = dump_multiple_weights( ptr );
  601. size = *ptr++;
  602. printf( "\nJamo sort:\n\n" );
  603. for (i = 0; i < size; i++)
  604. {
  605. static const WCHAR hangul_chars[] =
  606. {
  607. 0xa960, 0xa961, 0xa962, 0xa963, 0xa964, 0xa965, 0xa966, 0xa967,
  608. 0xa968, 0xa969, 0xa96a, 0xa96b, 0xa96c, 0xa96d, 0xa96e, 0xa96f,
  609. 0xa970, 0xa971, 0xa972, 0xa973, 0xa974, 0xa975, 0xa976, 0xa977,
  610. 0xa978, 0xa979, 0xa97a, 0xa97b, 0xa97c,
  611. 0xd7b0, 0xd7b1, 0xd7b2, 0xd7b3, 0xd7b4, 0xd7b5, 0xd7b6, 0xd7b7,
  612. 0xd7b8, 0xd7b9, 0xd7ba, 0xd7bb, 0xd7bc, 0xd7bd, 0xd7be, 0xd7bf,
  613. 0xd7c0, 0xd7c1, 0xd7c2, 0xd7c3, 0xd7c4, 0xd7c5, 0xd7c6,
  614. 0xd7cb, 0xd7cc, 0xd7cd, 0xd7ce, 0xd7cf,
  615. 0xd7d0, 0xd7d1, 0xd7d2, 0xd7d3, 0xd7d4, 0xd7d5, 0xd7d6, 0xd7d7,
  616. 0xd7d8, 0xd7d9, 0xd7da, 0xd7db, 0xd7dc, 0xd7dd, 0xd7de, 0xd7df,
  617. 0xd7e0, 0xd7e1, 0xd7e2, 0xd7e3, 0xd7e4, 0xd7e5, 0xd7e6, 0xd7e7,
  618. 0xd7e8, 0xd7e9, 0xd7ea, 0xd7eb, 0xd7ec, 0xd7ed, 0xd7ee, 0xd7ef,
  619. 0xd7f0, 0xd7f1, 0xd7f2, 0xd7f3, 0xd7f4, 0xd7f5, 0xd7f6, 0xd7f7,
  620. 0xd7f8, 0xd7f9, 0xd7fa, 0xd7fb
  621. };
  622. const BYTE *b = (const BYTE *)(ptr + 2 * i);
  623. WCHAR wc = i < 0x100 ? 0x1100 + i : hangul_chars[i - 0x100];
  624. printf( "%04x: %02x %02x %02x %02x %02x\n", wc, b[0], b[1], b[2], b[3], b[4] );
  625. }
  626. printf( "\nExceptions:\n" );
  627. for (i = 0; i < guid_count; i++)
  628. {
  629. if (!guids[i].except) continue;
  630. printf( "\n %s\n", get_guid_str( &guids[i].id ));
  631. dump_exceptions( sortkeys, guids[i].except );
  632. if (!guids[i].ling_except) continue;
  633. printf( "\n %s LINGUISTIC_CASING\n", get_guid_str( &guids[i].id ));
  634. dump_exceptions( sortkeys, guids[i].ling_except );
  635. }
  636. }
  637. printf( "\n" );
  638. }
  639. void nls_dump(void)
  640. {
  641. const char *name = strrchr( globals.input_name, '/' );
  642. if (name) name++;
  643. else name = globals.input_name;
  644. if (!strcasecmp( name, "l_intl.nls" )) return dump_casemap();
  645. if (!strncasecmp( name, "c_", 2 )) return dump_codepage();
  646. if (!strncasecmp( name, "norm", 4 )) return dump_norm();
  647. if (!strcasecmp( name, "sortdefault.nls" )) return dump_sort( 0 );
  648. if (!strncasecmp( name, "sort", 4 )) return dump_sort( 1 );
  649. fprintf( stderr, "Unrecognized file name '%s'\n", globals.input_name );
  650. }
  651. enum FileSig get_kind_nls(void)
  652. {
  653. if (strlen( globals.input_name ) < 5) return SIG_UNKNOWN;
  654. if (strcasecmp( globals.input_name + strlen(globals.input_name) - 4, ".nls" )) return SIG_UNKNOWN;
  655. return SIG_NLS;
  656. }