bidi_test.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. /*
  2. * Test program that reads the Unicode bidi algorithm test case lists
  3. * that form part of the Unicode Character Database:
  4. *
  5. * https://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt
  6. * https://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
  7. */
  8. #include <ctype.h>
  9. #include "putty.h"
  10. #include "misc.h"
  11. #include "bidi.h"
  12. static int pass = 0, fail = 0;
  13. static BidiContext *ctx;
  14. static const char *extract_word(char **ptr)
  15. {
  16. char *p = *ptr;
  17. while (*p && isspace((unsigned char)*p)) p++;
  18. char *start = p;
  19. while (*p && !isspace((unsigned char)*p)) p++;
  20. if (*p) {
  21. *p++ = '\0';
  22. while (*p && isspace((unsigned char)*p)) p++;
  23. }
  24. *ptr = p;
  25. return start;
  26. }
  27. #define TYPETONAME(X) #X,
  28. static const char *const typenames[] = { BIDI_CHAR_TYPE_LIST(TYPETONAME) };
  29. #undef TYPETONAME
  30. static void run_test(const char *filename, unsigned lineno,
  31. bidi_char *bcs, size_t bcs_len,
  32. const unsigned *order, size_t order_len,
  33. int override)
  34. {
  35. size_t bcs_orig_len = bcs_len;
  36. bidi_char *bcs_orig = snewn(bcs_orig_len, bidi_char);
  37. if (bcs_orig_len)
  38. memcpy(bcs_orig, bcs, bcs_orig_len * sizeof(bidi_char));
  39. bcs_len = do_bidi_test(ctx, bcs, bcs_len, override);
  40. /*
  41. * TR9 revision 44 rule X9 says we remove explicit embedding
  42. * controls and BN characters. So the test cases don't list them
  43. * in the expected outputs. Do the same to our own output - unless
  44. * we're testing the standard version of the algorithm, in which
  45. * case, we expect the output to be exactly as the test cases say.
  46. */
  47. unsigned *our_order = snewn(bcs_len, unsigned);
  48. size_t our_order_len = 0;
  49. for (size_t i = 0; i < bcs_len; i++) {
  50. BidiType t = bidi_getType(bcs[i].wc);
  51. #ifndef REMOVE_FORMATTING_CHARS
  52. if (typeIsRemovedDuringProcessing(t))
  53. continue;
  54. #endif
  55. our_order[our_order_len++] = bcs[i].index;
  56. }
  57. bool ok = false;
  58. if (our_order_len == order_len) {
  59. ok = true;
  60. for (size_t i = 0; i < our_order_len; i++)
  61. if (our_order[i] != order[i])
  62. ok = false;
  63. }
  64. if (ok) {
  65. pass++;
  66. } else {
  67. fail++;
  68. printf("%s:%u: failed order\n", filename, lineno);
  69. printf(" input chars:");
  70. for (size_t i = 0; i < bcs_orig_len; i++)
  71. printf(" %04x", bcs_orig[i].wc);
  72. printf("\n");
  73. printf(" classes: ");
  74. for (size_t i = 0; i < bcs_orig_len; i++)
  75. printf(" %-4s", typenames[bidi_getType(bcs_orig[i].wc)]);
  76. printf("\n");
  77. printf(" para level = %s\n",
  78. override > 0 ? "LTR" : override < 0 ? "RTL" : "auto");
  79. printf(" expected:");
  80. for (size_t i = 0; i < order_len; i++)
  81. printf(" %u", order[i]);
  82. printf("\n");
  83. printf(" got: ");
  84. for (size_t i = 0; i < our_order_len; i++)
  85. printf(" %u", our_order[i]);
  86. printf("\n");
  87. }
  88. /* Put the original data back so we can re-test with another override */
  89. memcpy(bcs, bcs_orig, bcs_orig_len * sizeof(bidi_char));
  90. sfree(bcs_orig);
  91. sfree(our_order);
  92. }
  93. static void class_test(const char *filename, FILE *fp)
  94. {
  95. unsigned lineno = 0;
  96. size_t bcs_size = 0, bcs_len = 0;
  97. bidi_char *bcs = NULL;
  98. size_t order_size = 0, order_len = 0;
  99. unsigned *order = NULL;
  100. /* Preliminary: find a representative character of every bidi
  101. * type. Prefer positive-width ones if available. */
  102. unsigned representatives[N_BIDI_TYPES];
  103. for (size_t i = 0; i < N_BIDI_TYPES; i++)
  104. representatives[i] = 0;
  105. for (unsigned uc = 1; uc < 0x110000; uc++) {
  106. unsigned type = bidi_getType(uc);
  107. if (!representatives[type] ||
  108. (mk_wcwidth(representatives[type]) <= 0 && mk_wcwidth(uc) > 0))
  109. representatives[type] = uc;
  110. }
  111. while (true) {
  112. lineno++;
  113. char *line = chomp(fgetline(fp));
  114. if (!line)
  115. break;
  116. /* Skip blank lines and comments */
  117. if (!line[0] || line[0] == '#') {
  118. sfree(line);
  119. continue;
  120. }
  121. /* Parse @Reorder lines, which tell us the expected output
  122. * order for all following test cases (until superseded) */
  123. if (strstartswith(line, "@Reorder:")) {
  124. char *p = line;
  125. extract_word(&p); /* eat the "@Reorder:" header itself */
  126. order_len = 0;
  127. while (1) {
  128. const char *word = extract_word(&p);
  129. if (!*word)
  130. break;
  131. sgrowarray(order, order_size, order_len);
  132. order[order_len++] = strtoul(word, NULL, 0);
  133. }
  134. sfree(line);
  135. continue;
  136. }
  137. /* Skip @Levels lines, which we don't (yet?) do anything with */
  138. if (strstartswith(line, "@Levels:")) {
  139. sfree(line);
  140. continue;
  141. }
  142. /* Everything remaining should be an actual test */
  143. char *semicolon = strchr(line, ';');
  144. if (!semicolon) {
  145. printf("%s:%u: bad test line': no bitmap\n", filename, lineno);
  146. sfree(line);
  147. continue;
  148. }
  149. *semicolon++ = '\0';
  150. unsigned bitmask = strtoul(semicolon, NULL, 0);
  151. char *p = line;
  152. bcs_len = 0;
  153. bool test_ok = true;
  154. while (1) {
  155. const char *word = extract_word(&p);
  156. if (!*word)
  157. break;
  158. unsigned type;
  159. for (type = 0; type < N_BIDI_TYPES; type++)
  160. if (!strcmp(word, typenames[type]))
  161. break;
  162. if (type == N_BIDI_TYPES) {
  163. printf("%s:%u: bad test line: bad bidi type '%s'\n",
  164. filename, lineno, word);
  165. test_ok = false;
  166. break;
  167. }
  168. sgrowarray(bcs, bcs_size, bcs_len);
  169. bcs[bcs_len].wc = representatives[type];
  170. bcs[bcs_len].origwc = bcs[bcs_len].wc;
  171. bcs[bcs_len].index = bcs_len;
  172. bcs[bcs_len].nchars = 1;
  173. bcs_len++;
  174. }
  175. if (!test_ok) {
  176. sfree(line);
  177. continue;
  178. }
  179. if (bitmask & 1)
  180. run_test(filename, lineno, bcs, bcs_len, order, order_len, 0);
  181. if (bitmask & 2)
  182. run_test(filename, lineno, bcs, bcs_len, order, order_len, +1);
  183. if (bitmask & 4)
  184. run_test(filename, lineno, bcs, bcs_len, order, order_len, -1);
  185. sfree(line);
  186. }
  187. sfree(bcs);
  188. sfree(order);
  189. }
  190. static void char_test(const char *filename, FILE *fp)
  191. {
  192. unsigned lineno = 0;
  193. size_t bcs_size = 0, bcs_len = 0;
  194. bidi_char *bcs = NULL;
  195. size_t order_size = 0, order_len = 0;
  196. unsigned *order = NULL;
  197. while (true) {
  198. lineno++;
  199. char *line = chomp(fgetline(fp));
  200. if (!line)
  201. break;
  202. /* Skip blank lines and comments */
  203. if (!line[0] || line[0] == '#') {
  204. sfree(line);
  205. continue;
  206. }
  207. /* Break each test line up into its main fields */
  208. ptrlen input_pl, para_dir_pl, order_pl;
  209. {
  210. ptrlen pl = ptrlen_from_asciz(line);
  211. input_pl = ptrlen_get_word(&pl, ";");
  212. para_dir_pl = ptrlen_get_word(&pl, ";");
  213. ptrlen_get_word(&pl, ";"); /* paragraph level, which we ignore */
  214. ptrlen_get_word(&pl, ";"); /* embedding levels, which we ignore */
  215. order_pl = ptrlen_get_word(&pl, ";");
  216. }
  217. int override;
  218. {
  219. char *para_dir_str = mkstr(para_dir_pl);
  220. unsigned para_dir = strtoul(para_dir_str, NULL, 0);
  221. sfree(para_dir_str);
  222. override = (para_dir == 0 ? +1 : para_dir == 1 ? -1 : 0);
  223. }
  224. /* Break up the input into Unicode characters */
  225. bcs_len = 0;
  226. {
  227. ptrlen pl = input_pl;
  228. while (pl.len) {
  229. ptrlen chr = ptrlen_get_word(&pl, " ");
  230. char *chrstr = mkstr(chr);
  231. sgrowarray(bcs, bcs_size, bcs_len);
  232. bcs[bcs_len].wc = strtoul(chrstr, NULL, 16);
  233. bcs[bcs_len].origwc = bcs[bcs_len].wc;
  234. bcs[bcs_len].index = bcs_len;
  235. bcs[bcs_len].nchars = 1;
  236. bcs_len++;
  237. sfree(chrstr);
  238. }
  239. }
  240. /* Ditto the expected output order */
  241. order_len = 0;
  242. {
  243. ptrlen pl = order_pl;
  244. while (pl.len) {
  245. ptrlen chr = ptrlen_get_word(&pl, " ");
  246. char *chrstr = mkstr(chr);
  247. sgrowarray(order, order_size, order_len);
  248. order[order_len++] = strtoul(chrstr, NULL, 0);
  249. sfree(chrstr);
  250. }
  251. }
  252. run_test(filename, lineno, bcs, bcs_len, order, order_len, override);
  253. sfree(line);
  254. }
  255. sfree(bcs);
  256. sfree(order);
  257. }
  258. void out_of_memory(void)
  259. {
  260. fprintf(stderr, "out of memory!\n");
  261. exit(2);
  262. }
  263. static void usage(FILE *fp)
  264. {
  265. fprintf(fp, "\
  266. usage: bidi_test ( ( --class | --char ) infile... )...\n\
  267. e.g.: bidi_test --class BidiTest.txt --char BidiCharacterTest.txt\n\
  268. also: --help display this text\n\
  269. ");
  270. }
  271. int main(int argc, char **argv)
  272. {
  273. void (*testfn)(const char *, FILE *) = NULL;
  274. bool doing_opts = true;
  275. const char *filename = NULL;
  276. bool done_something = false;
  277. ctx = bidi_new_context();
  278. while (--argc > 0) {
  279. const char *arg = *++argv;
  280. if (doing_opts && arg[0] == '-' && arg[1]) {
  281. if (!strcmp(arg, "--")) {
  282. doing_opts = false;
  283. } else if (!strcmp(arg, "--class")) {
  284. testfn = class_test;
  285. } else if (!strcmp(arg, "--char")) {
  286. testfn = char_test;
  287. } else if (!strcmp(arg, "--help")) {
  288. usage(stdout);
  289. return 0;
  290. } else {
  291. fprintf(stderr, "unrecognised option '%s'\n", arg);
  292. return 1;
  293. }
  294. } else {
  295. const char *filename = arg;
  296. if (!testfn) {
  297. fprintf(stderr, "no mode argument provided before filename "
  298. "'%s'\n", filename);
  299. return 1;
  300. }
  301. if (!strcmp(filename, "-")) {
  302. testfn("<standard input>", stdin);
  303. } else {
  304. FILE *fp = fopen(filename, "r");
  305. if (!fp) {
  306. fprintf(stderr, "unable to open '%s'\n", filename);
  307. return 1;
  308. }
  309. testfn(filename, fp);
  310. fclose(fp);
  311. }
  312. done_something = true;
  313. }
  314. }
  315. if (!done_something) {
  316. usage(stderr);
  317. return 1;
  318. }
  319. if (!filename)
  320. filename = "-";
  321. printf("pass %d fail %d total %d\n", pass, fail, pass + fail);
  322. bidi_free_context(ctx);
  323. return fail != 0;
  324. }