minibidi.c 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024
  1. /************************************************************************
  2. *
  3. * ------------
  4. * Description:
  5. * ------------
  6. * This is an implemention of Unicode's Bidirectional Algorithm
  7. * (known as UAX #9).
  8. *
  9. * http://www.unicode.org/reports/tr9/
  10. *
  11. * Author: Ahmad Khalifa
  12. *
  13. * (www.arabeyes.org - under MIT license)
  14. *
  15. ************************************************************************/
  16. /*
  17. * TODO:
  18. * =====
  19. * - Explicit marks need to be handled (they are not 100% now)
  20. * - Ligatures
  21. */
  22. #include <stdlib.h> /* definition of wchar_t*/
  23. #include "misc.h"
  24. #define LMASK 0x3F /* Embedding Level mask */
  25. #define OMASK 0xC0 /* Override mask */
  26. #define OISL 0x80 /* Override is L */
  27. #define OISR 0x40 /* Override is R */
  28. /* For standalone compilation in a testing mode.
  29. * Still depends on the PuTTY headers for snewn and sfree, but can avoid
  30. * _linking_ with any other PuTTY code. */
  31. #ifdef TEST_GETTYPE
  32. #define safemalloc malloc
  33. #define safefree free
  34. #endif
  35. /* Shaping Helpers */
  36. #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
  37. shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
  38. #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
  39. #define SFINAL(xh) ((xh)+1)
  40. #define SINITIAL(xh) ((xh)+2)
  41. #define SMEDIAL(ch) ((ch)+3)
  42. #define leastGreaterOdd(x) ( ((x)+1) | 1 )
  43. #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
  44. typedef struct bidi_char {
  45. unsigned int origwc, wc;
  46. unsigned short index;
  47. } bidi_char;
  48. /* function declarations */
  49. void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
  50. int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
  51. unsigned char getType(int ch);
  52. unsigned char setOverrideBits(unsigned char level, unsigned char override);
  53. int getPreviousLevel(unsigned char* level, int from);
  54. int do_shape(bidi_char *line, bidi_char *to, int count);
  55. int do_bidi(bidi_char *line, int count);
  56. void doMirror(unsigned int *ch);
  57. /* character types */
  58. enum {
  59. L,
  60. LRE,
  61. LRO,
  62. R,
  63. AL,
  64. RLE,
  65. RLO,
  66. PDF,
  67. EN,
  68. ES,
  69. ET,
  70. AN,
  71. CS,
  72. NSM,
  73. BN,
  74. B,
  75. S,
  76. WS,
  77. ON
  78. };
  79. /* Shaping Types */
  80. enum {
  81. SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
  82. SR, /* Right-Joining, ie has Isolated, Final */
  83. SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
  84. SU, /* Non-Joining */
  85. SC /* Join-Causing, like U+0640 (TATWEEL) */
  86. };
  87. typedef struct {
  88. char type;
  89. wchar_t form_b;
  90. } shape_node;
  91. /* Kept near the actual table, for verification. */
  92. #define SHAPE_FIRST 0x621
  93. #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1)
  94. const shape_node shapetypes[] = {
  95. /* index, Typ, Iso, Ligature Index*/
  96. /* 621 */ {SU, 0xFE80},
  97. /* 622 */ {SR, 0xFE81},
  98. /* 623 */ {SR, 0xFE83},
  99. /* 624 */ {SR, 0xFE85},
  100. /* 625 */ {SR, 0xFE87},
  101. /* 626 */ {SD, 0xFE89},
  102. /* 627 */ {SR, 0xFE8D},
  103. /* 628 */ {SD, 0xFE8F},
  104. /* 629 */ {SR, 0xFE93},
  105. /* 62A */ {SD, 0xFE95},
  106. /* 62B */ {SD, 0xFE99},
  107. /* 62C */ {SD, 0xFE9D},
  108. /* 62D */ {SD, 0xFEA1},
  109. /* 62E */ {SD, 0xFEA5},
  110. /* 62F */ {SR, 0xFEA9},
  111. /* 630 */ {SR, 0xFEAB},
  112. /* 631 */ {SR, 0xFEAD},
  113. /* 632 */ {SR, 0xFEAF},
  114. /* 633 */ {SD, 0xFEB1},
  115. /* 634 */ {SD, 0xFEB5},
  116. /* 635 */ {SD, 0xFEB9},
  117. /* 636 */ {SD, 0xFEBD},
  118. /* 637 */ {SD, 0xFEC1},
  119. /* 638 */ {SD, 0xFEC5},
  120. /* 639 */ {SD, 0xFEC9},
  121. /* 63A */ {SD, 0xFECD},
  122. /* 63B */ {SU, 0x0},
  123. /* 63C */ {SU, 0x0},
  124. /* 63D */ {SU, 0x0},
  125. /* 63E */ {SU, 0x0},
  126. /* 63F */ {SU, 0x0},
  127. /* 640 */ {SC, 0x0},
  128. /* 641 */ {SD, 0xFED1},
  129. /* 642 */ {SD, 0xFED5},
  130. /* 643 */ {SD, 0xFED9},
  131. /* 644 */ {SD, 0xFEDD},
  132. /* 645 */ {SD, 0xFEE1},
  133. /* 646 */ {SD, 0xFEE5},
  134. /* 647 */ {SD, 0xFEE9},
  135. /* 648 */ {SR, 0xFEED},
  136. /* 649 */ {SR, 0xFEEF}, /* SD */
  137. /* 64A */ {SD, 0xFEF1},
  138. /* 64B */ {SU, 0x0},
  139. /* 64C */ {SU, 0x0},
  140. /* 64D */ {SU, 0x0},
  141. /* 64E */ {SU, 0x0},
  142. /* 64F */ {SU, 0x0},
  143. /* 650 */ {SU, 0x0},
  144. /* 651 */ {SU, 0x0},
  145. /* 652 */ {SU, 0x0},
  146. /* 653 */ {SU, 0x0},
  147. /* 654 */ {SU, 0x0},
  148. /* 655 */ {SU, 0x0},
  149. /* 656 */ {SU, 0x0},
  150. /* 657 */ {SU, 0x0},
  151. /* 658 */ {SU, 0x0},
  152. /* 659 */ {SU, 0x0},
  153. /* 65A */ {SU, 0x0},
  154. /* 65B */ {SU, 0x0},
  155. /* 65C */ {SU, 0x0},
  156. /* 65D */ {SU, 0x0},
  157. /* 65E */ {SU, 0x0},
  158. /* 65F */ {SU, 0x0},
  159. /* 660 */ {SU, 0x0},
  160. /* 661 */ {SU, 0x0},
  161. /* 662 */ {SU, 0x0},
  162. /* 663 */ {SU, 0x0},
  163. /* 664 */ {SU, 0x0},
  164. /* 665 */ {SU, 0x0},
  165. /* 666 */ {SU, 0x0},
  166. /* 667 */ {SU, 0x0},
  167. /* 668 */ {SU, 0x0},
  168. /* 669 */ {SU, 0x0},
  169. /* 66A */ {SU, 0x0},
  170. /* 66B */ {SU, 0x0},
  171. /* 66C */ {SU, 0x0},
  172. /* 66D */ {SU, 0x0},
  173. /* 66E */ {SU, 0x0},
  174. /* 66F */ {SU, 0x0},
  175. /* 670 */ {SU, 0x0},
  176. /* 671 */ {SR, 0xFB50},
  177. /* 672 */ {SU, 0x0},
  178. /* 673 */ {SU, 0x0},
  179. /* 674 */ {SU, 0x0},
  180. /* 675 */ {SU, 0x0},
  181. /* 676 */ {SU, 0x0},
  182. /* 677 */ {SU, 0x0},
  183. /* 678 */ {SU, 0x0},
  184. /* 679 */ {SD, 0xFB66},
  185. /* 67A */ {SD, 0xFB5E},
  186. /* 67B */ {SD, 0xFB52},
  187. /* 67C */ {SU, 0x0},
  188. /* 67D */ {SU, 0x0},
  189. /* 67E */ {SD, 0xFB56},
  190. /* 67F */ {SD, 0xFB62},
  191. /* 680 */ {SD, 0xFB5A},
  192. /* 681 */ {SU, 0x0},
  193. /* 682 */ {SU, 0x0},
  194. /* 683 */ {SD, 0xFB76},
  195. /* 684 */ {SD, 0xFB72},
  196. /* 685 */ {SU, 0x0},
  197. /* 686 */ {SD, 0xFB7A},
  198. /* 687 */ {SD, 0xFB7E},
  199. /* 688 */ {SR, 0xFB88},
  200. /* 689 */ {SU, 0x0},
  201. /* 68A */ {SU, 0x0},
  202. /* 68B */ {SU, 0x0},
  203. /* 68C */ {SR, 0xFB84},
  204. /* 68D */ {SR, 0xFB82},
  205. /* 68E */ {SR, 0xFB86},
  206. /* 68F */ {SU, 0x0},
  207. /* 690 */ {SU, 0x0},
  208. /* 691 */ {SR, 0xFB8C},
  209. /* 692 */ {SU, 0x0},
  210. /* 693 */ {SU, 0x0},
  211. /* 694 */ {SU, 0x0},
  212. /* 695 */ {SU, 0x0},
  213. /* 696 */ {SU, 0x0},
  214. /* 697 */ {SU, 0x0},
  215. /* 698 */ {SR, 0xFB8A},
  216. /* 699 */ {SU, 0x0},
  217. /* 69A */ {SU, 0x0},
  218. /* 69B */ {SU, 0x0},
  219. /* 69C */ {SU, 0x0},
  220. /* 69D */ {SU, 0x0},
  221. /* 69E */ {SU, 0x0},
  222. /* 69F */ {SU, 0x0},
  223. /* 6A0 */ {SU, 0x0},
  224. /* 6A1 */ {SU, 0x0},
  225. /* 6A2 */ {SU, 0x0},
  226. /* 6A3 */ {SU, 0x0},
  227. /* 6A4 */ {SD, 0xFB6A},
  228. /* 6A5 */ {SU, 0x0},
  229. /* 6A6 */ {SD, 0xFB6E},
  230. /* 6A7 */ {SU, 0x0},
  231. /* 6A8 */ {SU, 0x0},
  232. /* 6A9 */ {SD, 0xFB8E},
  233. /* 6AA */ {SU, 0x0},
  234. /* 6AB */ {SU, 0x0},
  235. /* 6AC */ {SU, 0x0},
  236. /* 6AD */ {SD, 0xFBD3},
  237. /* 6AE */ {SU, 0x0},
  238. /* 6AF */ {SD, 0xFB92},
  239. /* 6B0 */ {SU, 0x0},
  240. /* 6B1 */ {SD, 0xFB9A},
  241. /* 6B2 */ {SU, 0x0},
  242. /* 6B3 */ {SD, 0xFB96},
  243. /* 6B4 */ {SU, 0x0},
  244. /* 6B5 */ {SU, 0x0},
  245. /* 6B6 */ {SU, 0x0},
  246. /* 6B7 */ {SU, 0x0},
  247. /* 6B8 */ {SU, 0x0},
  248. /* 6B9 */ {SU, 0x0},
  249. /* 6BA */ {SR, 0xFB9E},
  250. /* 6BB */ {SD, 0xFBA0},
  251. /* 6BC */ {SU, 0x0},
  252. /* 6BD */ {SU, 0x0},
  253. /* 6BE */ {SD, 0xFBAA},
  254. /* 6BF */ {SU, 0x0},
  255. /* 6C0 */ {SR, 0xFBA4},
  256. /* 6C1 */ {SD, 0xFBA6},
  257. /* 6C2 */ {SU, 0x0},
  258. /* 6C3 */ {SU, 0x0},
  259. /* 6C4 */ {SU, 0x0},
  260. /* 6C5 */ {SR, 0xFBE0},
  261. /* 6C6 */ {SR, 0xFBD9},
  262. /* 6C7 */ {SR, 0xFBD7},
  263. /* 6C8 */ {SR, 0xFBDB},
  264. /* 6C9 */ {SR, 0xFBE2},
  265. /* 6CA */ {SU, 0x0},
  266. /* 6CB */ {SR, 0xFBDE},
  267. /* 6CC */ {SD, 0xFBFC},
  268. /* 6CD */ {SU, 0x0},
  269. /* 6CE */ {SU, 0x0},
  270. /* 6CF */ {SU, 0x0},
  271. /* 6D0 */ {SU, 0x0},
  272. /* 6D1 */ {SU, 0x0},
  273. /* 6D2 */ {SR, 0xFBAE},
  274. };
  275. /*
  276. * Flips the text buffer, according to max level, and
  277. * all higher levels
  278. *
  279. * Input:
  280. * from: text buffer, on which to apply flipping
  281. * level: resolved levels buffer
  282. * max: the maximum level found in this line (should be unsigned char)
  283. * count: line size in bidi_char
  284. */
  285. void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
  286. {
  287. int i, j, k, tlevel;
  288. bidi_char temp;
  289. j = i = 0;
  290. while (i<count && j<count) {
  291. /* find the start of the run of level=max */
  292. tlevel = max;
  293. i = j = findIndexOfRun(level, i, count, max);
  294. /* find the end of the run */
  295. while (i<count && tlevel <= level[i]) {
  296. i++;
  297. }
  298. for (k = i - 1; k > j; k--, j++) {
  299. temp = from[k];
  300. from[k] = from[j];
  301. from[j] = temp;
  302. }
  303. }
  304. }
  305. /*
  306. * Finds the index of a run with level equals tlevel
  307. */
  308. int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
  309. {
  310. int i;
  311. for (i=start; i<count; i++) {
  312. if (tlevel == level[i]) {
  313. return i;
  314. }
  315. }
  316. return count;
  317. }
  318. /*
  319. * Returns the bidi character type of ch.
  320. *
  321. * The data table in this function is constructed from the Unicode
  322. * Character Database, downloadable from unicode.org at the URL
  323. *
  324. * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
  325. *
  326. * by the following fragment of Perl:
  327. perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
  328. -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
  329. -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
  330. -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
  331. -e '$pfl=$fl; END { &reset }; sub reset {' \
  332. -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
  333. -e ' if defined $runstart and $runtype ne "ON";' \
  334. -e '$runstart=$runend=$num; $runtype=$type;}' \
  335. UnicodeData.txt
  336. */
  337. unsigned char getType(int ch)
  338. {
  339. static const struct {
  340. int first, last, type;
  341. } lookup[] = {
  342. {0x0000, 0x0008, BN},
  343. {0x0009, 0x0009, S},
  344. {0x000a, 0x000a, B},
  345. {0x000b, 0x000b, S},
  346. {0x000c, 0x000c, WS},
  347. {0x000d, 0x000d, B},
  348. {0x000e, 0x001b, BN},
  349. {0x001c, 0x001e, B},
  350. {0x001f, 0x001f, S},
  351. {0x0020, 0x0020, WS},
  352. {0x0023, 0x0025, ET},
  353. {0x002b, 0x002b, ES},
  354. {0x002c, 0x002c, CS},
  355. {0x002d, 0x002d, ES},
  356. {0x002e, 0x002f, CS},
  357. {0x0030, 0x0039, EN},
  358. {0x003a, 0x003a, CS},
  359. {0x0041, 0x005a, L},
  360. {0x0061, 0x007a, L},
  361. {0x007f, 0x0084, BN},
  362. {0x0085, 0x0085, B},
  363. {0x0086, 0x009f, BN},
  364. {0x00a0, 0x00a0, CS},
  365. {0x00a2, 0x00a5, ET},
  366. {0x00aa, 0x00aa, L},
  367. {0x00ad, 0x00ad, BN},
  368. {0x00b0, 0x00b1, ET},
  369. {0x00b2, 0x00b3, EN},
  370. {0x00b5, 0x00b5, L},
  371. {0x00b9, 0x00b9, EN},
  372. {0x00ba, 0x00ba, L},
  373. {0x00c0, 0x00d6, L},
  374. {0x00d8, 0x00f6, L},
  375. {0x00f8, 0x0236, L},
  376. {0x0250, 0x02b8, L},
  377. {0x02bb, 0x02c1, L},
  378. {0x02d0, 0x02d1, L},
  379. {0x02e0, 0x02e4, L},
  380. {0x02ee, 0x02ee, L},
  381. {0x0300, 0x0357, NSM},
  382. {0x035d, 0x036f, NSM},
  383. {0x037a, 0x037a, L},
  384. {0x0386, 0x0386, L},
  385. {0x0388, 0x038a, L},
  386. {0x038c, 0x038c, L},
  387. {0x038e, 0x03a1, L},
  388. {0x03a3, 0x03ce, L},
  389. {0x03d0, 0x03f5, L},
  390. {0x03f7, 0x03fb, L},
  391. {0x0400, 0x0482, L},
  392. {0x0483, 0x0486, NSM},
  393. {0x0488, 0x0489, NSM},
  394. {0x048a, 0x04ce, L},
  395. {0x04d0, 0x04f5, L},
  396. {0x04f8, 0x04f9, L},
  397. {0x0500, 0x050f, L},
  398. {0x0531, 0x0556, L},
  399. {0x0559, 0x055f, L},
  400. {0x0561, 0x0587, L},
  401. {0x0589, 0x0589, L},
  402. {0x0591, 0x05a1, NSM},
  403. {0x05a3, 0x05b9, NSM},
  404. {0x05bb, 0x05bd, NSM},
  405. {0x05be, 0x05be, R},
  406. {0x05bf, 0x05bf, NSM},
  407. {0x05c0, 0x05c0, R},
  408. {0x05c1, 0x05c2, NSM},
  409. {0x05c3, 0x05c3, R},
  410. {0x05c4, 0x05c4, NSM},
  411. {0x05d0, 0x05ea, R},
  412. {0x05f0, 0x05f4, R},
  413. {0x0600, 0x0603, AL},
  414. {0x060c, 0x060c, CS},
  415. {0x060d, 0x060d, AL},
  416. {0x0610, 0x0615, NSM},
  417. {0x061b, 0x061b, AL},
  418. {0x061f, 0x061f, AL},
  419. {0x0621, 0x063a, AL},
  420. {0x0640, 0x064a, AL},
  421. {0x064b, 0x0658, NSM},
  422. {0x0660, 0x0669, AN},
  423. {0x066a, 0x066a, ET},
  424. {0x066b, 0x066c, AN},
  425. {0x066d, 0x066f, AL},
  426. {0x0670, 0x0670, NSM},
  427. {0x0671, 0x06d5, AL},
  428. {0x06d6, 0x06dc, NSM},
  429. {0x06dd, 0x06dd, AL},
  430. {0x06de, 0x06e4, NSM},
  431. {0x06e5, 0x06e6, AL},
  432. {0x06e7, 0x06e8, NSM},
  433. {0x06ea, 0x06ed, NSM},
  434. {0x06ee, 0x06ef, AL},
  435. {0x06f0, 0x06f9, EN},
  436. {0x06fa, 0x070d, AL},
  437. {0x070f, 0x070f, BN},
  438. {0x0710, 0x0710, AL},
  439. {0x0711, 0x0711, NSM},
  440. {0x0712, 0x072f, AL},
  441. {0x0730, 0x074a, NSM},
  442. {0x074d, 0x074f, AL},
  443. {0x0780, 0x07a5, AL},
  444. {0x07a6, 0x07b0, NSM},
  445. {0x07b1, 0x07b1, AL},
  446. {0x0901, 0x0902, NSM},
  447. {0x0903, 0x0939, L},
  448. {0x093c, 0x093c, NSM},
  449. {0x093d, 0x0940, L},
  450. {0x0941, 0x0948, NSM},
  451. {0x0949, 0x094c, L},
  452. {0x094d, 0x094d, NSM},
  453. {0x0950, 0x0950, L},
  454. {0x0951, 0x0954, NSM},
  455. {0x0958, 0x0961, L},
  456. {0x0962, 0x0963, NSM},
  457. {0x0964, 0x0970, L},
  458. {0x0981, 0x0981, NSM},
  459. {0x0982, 0x0983, L},
  460. {0x0985, 0x098c, L},
  461. {0x098f, 0x0990, L},
  462. {0x0993, 0x09a8, L},
  463. {0x09aa, 0x09b0, L},
  464. {0x09b2, 0x09b2, L},
  465. {0x09b6, 0x09b9, L},
  466. {0x09bc, 0x09bc, NSM},
  467. {0x09bd, 0x09c0, L},
  468. {0x09c1, 0x09c4, NSM},
  469. {0x09c7, 0x09c8, L},
  470. {0x09cb, 0x09cc, L},
  471. {0x09cd, 0x09cd, NSM},
  472. {0x09d7, 0x09d7, L},
  473. {0x09dc, 0x09dd, L},
  474. {0x09df, 0x09e1, L},
  475. {0x09e2, 0x09e3, NSM},
  476. {0x09e6, 0x09f1, L},
  477. {0x09f2, 0x09f3, ET},
  478. {0x09f4, 0x09fa, L},
  479. {0x0a01, 0x0a02, NSM},
  480. {0x0a03, 0x0a03, L},
  481. {0x0a05, 0x0a0a, L},
  482. {0x0a0f, 0x0a10, L},
  483. {0x0a13, 0x0a28, L},
  484. {0x0a2a, 0x0a30, L},
  485. {0x0a32, 0x0a33, L},
  486. {0x0a35, 0x0a36, L},
  487. {0x0a38, 0x0a39, L},
  488. {0x0a3c, 0x0a3c, NSM},
  489. {0x0a3e, 0x0a40, L},
  490. {0x0a41, 0x0a42, NSM},
  491. {0x0a47, 0x0a48, NSM},
  492. {0x0a4b, 0x0a4d, NSM},
  493. {0x0a59, 0x0a5c, L},
  494. {0x0a5e, 0x0a5e, L},
  495. {0x0a66, 0x0a6f, L},
  496. {0x0a70, 0x0a71, NSM},
  497. {0x0a72, 0x0a74, L},
  498. {0x0a81, 0x0a82, NSM},
  499. {0x0a83, 0x0a83, L},
  500. {0x0a85, 0x0a8d, L},
  501. {0x0a8f, 0x0a91, L},
  502. {0x0a93, 0x0aa8, L},
  503. {0x0aaa, 0x0ab0, L},
  504. {0x0ab2, 0x0ab3, L},
  505. {0x0ab5, 0x0ab9, L},
  506. {0x0abc, 0x0abc, NSM},
  507. {0x0abd, 0x0ac0, L},
  508. {0x0ac1, 0x0ac5, NSM},
  509. {0x0ac7, 0x0ac8, NSM},
  510. {0x0ac9, 0x0ac9, L},
  511. {0x0acb, 0x0acc, L},
  512. {0x0acd, 0x0acd, NSM},
  513. {0x0ad0, 0x0ad0, L},
  514. {0x0ae0, 0x0ae1, L},
  515. {0x0ae2, 0x0ae3, NSM},
  516. {0x0ae6, 0x0aef, L},
  517. {0x0af1, 0x0af1, ET},
  518. {0x0b01, 0x0b01, NSM},
  519. {0x0b02, 0x0b03, L},
  520. {0x0b05, 0x0b0c, L},
  521. {0x0b0f, 0x0b10, L},
  522. {0x0b13, 0x0b28, L},
  523. {0x0b2a, 0x0b30, L},
  524. {0x0b32, 0x0b33, L},
  525. {0x0b35, 0x0b39, L},
  526. {0x0b3c, 0x0b3c, NSM},
  527. {0x0b3d, 0x0b3e, L},
  528. {0x0b3f, 0x0b3f, NSM},
  529. {0x0b40, 0x0b40, L},
  530. {0x0b41, 0x0b43, NSM},
  531. {0x0b47, 0x0b48, L},
  532. {0x0b4b, 0x0b4c, L},
  533. {0x0b4d, 0x0b4d, NSM},
  534. {0x0b56, 0x0b56, NSM},
  535. {0x0b57, 0x0b57, L},
  536. {0x0b5c, 0x0b5d, L},
  537. {0x0b5f, 0x0b61, L},
  538. {0x0b66, 0x0b71, L},
  539. {0x0b82, 0x0b82, NSM},
  540. {0x0b83, 0x0b83, L},
  541. {0x0b85, 0x0b8a, L},
  542. {0x0b8e, 0x0b90, L},
  543. {0x0b92, 0x0b95, L},
  544. {0x0b99, 0x0b9a, L},
  545. {0x0b9c, 0x0b9c, L},
  546. {0x0b9e, 0x0b9f, L},
  547. {0x0ba3, 0x0ba4, L},
  548. {0x0ba8, 0x0baa, L},
  549. {0x0bae, 0x0bb5, L},
  550. {0x0bb7, 0x0bb9, L},
  551. {0x0bbe, 0x0bbf, L},
  552. {0x0bc0, 0x0bc0, NSM},
  553. {0x0bc1, 0x0bc2, L},
  554. {0x0bc6, 0x0bc8, L},
  555. {0x0bca, 0x0bcc, L},
  556. {0x0bcd, 0x0bcd, NSM},
  557. {0x0bd7, 0x0bd7, L},
  558. {0x0be7, 0x0bf2, L},
  559. {0x0bf9, 0x0bf9, ET},
  560. {0x0c01, 0x0c03, L},
  561. {0x0c05, 0x0c0c, L},
  562. {0x0c0e, 0x0c10, L},
  563. {0x0c12, 0x0c28, L},
  564. {0x0c2a, 0x0c33, L},
  565. {0x0c35, 0x0c39, L},
  566. {0x0c3e, 0x0c40, NSM},
  567. {0x0c41, 0x0c44, L},
  568. {0x0c46, 0x0c48, NSM},
  569. {0x0c4a, 0x0c4d, NSM},
  570. {0x0c55, 0x0c56, NSM},
  571. {0x0c60, 0x0c61, L},
  572. {0x0c66, 0x0c6f, L},
  573. {0x0c82, 0x0c83, L},
  574. {0x0c85, 0x0c8c, L},
  575. {0x0c8e, 0x0c90, L},
  576. {0x0c92, 0x0ca8, L},
  577. {0x0caa, 0x0cb3, L},
  578. {0x0cb5, 0x0cb9, L},
  579. {0x0cbc, 0x0cbc, NSM},
  580. {0x0cbd, 0x0cc4, L},
  581. {0x0cc6, 0x0cc8, L},
  582. {0x0cca, 0x0ccb, L},
  583. {0x0ccc, 0x0ccd, NSM},
  584. {0x0cd5, 0x0cd6, L},
  585. {0x0cde, 0x0cde, L},
  586. {0x0ce0, 0x0ce1, L},
  587. {0x0ce6, 0x0cef, L},
  588. {0x0d02, 0x0d03, L},
  589. {0x0d05, 0x0d0c, L},
  590. {0x0d0e, 0x0d10, L},
  591. {0x0d12, 0x0d28, L},
  592. {0x0d2a, 0x0d39, L},
  593. {0x0d3e, 0x0d40, L},
  594. {0x0d41, 0x0d43, NSM},
  595. {0x0d46, 0x0d48, L},
  596. {0x0d4a, 0x0d4c, L},
  597. {0x0d4d, 0x0d4d, NSM},
  598. {0x0d57, 0x0d57, L},
  599. {0x0d60, 0x0d61, L},
  600. {0x0d66, 0x0d6f, L},
  601. {0x0d82, 0x0d83, L},
  602. {0x0d85, 0x0d96, L},
  603. {0x0d9a, 0x0db1, L},
  604. {0x0db3, 0x0dbb, L},
  605. {0x0dbd, 0x0dbd, L},
  606. {0x0dc0, 0x0dc6, L},
  607. {0x0dca, 0x0dca, NSM},
  608. {0x0dcf, 0x0dd1, L},
  609. {0x0dd2, 0x0dd4, NSM},
  610. {0x0dd6, 0x0dd6, NSM},
  611. {0x0dd8, 0x0ddf, L},
  612. {0x0df2, 0x0df4, L},
  613. {0x0e01, 0x0e30, L},
  614. {0x0e31, 0x0e31, NSM},
  615. {0x0e32, 0x0e33, L},
  616. {0x0e34, 0x0e3a, NSM},
  617. {0x0e3f, 0x0e3f, ET},
  618. {0x0e40, 0x0e46, L},
  619. {0x0e47, 0x0e4e, NSM},
  620. {0x0e4f, 0x0e5b, L},
  621. {0x0e81, 0x0e82, L},
  622. {0x0e84, 0x0e84, L},
  623. {0x0e87, 0x0e88, L},
  624. {0x0e8a, 0x0e8a, L},
  625. {0x0e8d, 0x0e8d, L},
  626. {0x0e94, 0x0e97, L},
  627. {0x0e99, 0x0e9f, L},
  628. {0x0ea1, 0x0ea3, L},
  629. {0x0ea5, 0x0ea5, L},
  630. {0x0ea7, 0x0ea7, L},
  631. {0x0eaa, 0x0eab, L},
  632. {0x0ead, 0x0eb0, L},
  633. {0x0eb1, 0x0eb1, NSM},
  634. {0x0eb2, 0x0eb3, L},
  635. {0x0eb4, 0x0eb9, NSM},
  636. {0x0ebb, 0x0ebc, NSM},
  637. {0x0ebd, 0x0ebd, L},
  638. {0x0ec0, 0x0ec4, L},
  639. {0x0ec6, 0x0ec6, L},
  640. {0x0ec8, 0x0ecd, NSM},
  641. {0x0ed0, 0x0ed9, L},
  642. {0x0edc, 0x0edd, L},
  643. {0x0f00, 0x0f17, L},
  644. {0x0f18, 0x0f19, NSM},
  645. {0x0f1a, 0x0f34, L},
  646. {0x0f35, 0x0f35, NSM},
  647. {0x0f36, 0x0f36, L},
  648. {0x0f37, 0x0f37, NSM},
  649. {0x0f38, 0x0f38, L},
  650. {0x0f39, 0x0f39, NSM},
  651. {0x0f3e, 0x0f47, L},
  652. {0x0f49, 0x0f6a, L},
  653. {0x0f71, 0x0f7e, NSM},
  654. {0x0f7f, 0x0f7f, L},
  655. {0x0f80, 0x0f84, NSM},
  656. {0x0f85, 0x0f85, L},
  657. {0x0f86, 0x0f87, NSM},
  658. {0x0f88, 0x0f8b, L},
  659. {0x0f90, 0x0f97, NSM},
  660. {0x0f99, 0x0fbc, NSM},
  661. {0x0fbe, 0x0fc5, L},
  662. {0x0fc6, 0x0fc6, NSM},
  663. {0x0fc7, 0x0fcc, L},
  664. {0x0fcf, 0x0fcf, L},
  665. {0x1000, 0x1021, L},
  666. {0x1023, 0x1027, L},
  667. {0x1029, 0x102a, L},
  668. {0x102c, 0x102c, L},
  669. {0x102d, 0x1030, NSM},
  670. {0x1031, 0x1031, L},
  671. {0x1032, 0x1032, NSM},
  672. {0x1036, 0x1037, NSM},
  673. {0x1038, 0x1038, L},
  674. {0x1039, 0x1039, NSM},
  675. {0x1040, 0x1057, L},
  676. {0x1058, 0x1059, NSM},
  677. {0x10a0, 0x10c5, L},
  678. {0x10d0, 0x10f8, L},
  679. {0x10fb, 0x10fb, L},
  680. {0x1100, 0x1159, L},
  681. {0x115f, 0x11a2, L},
  682. {0x11a8, 0x11f9, L},
  683. {0x1200, 0x1206, L},
  684. {0x1208, 0x1246, L},
  685. {0x1248, 0x1248, L},
  686. {0x124a, 0x124d, L},
  687. {0x1250, 0x1256, L},
  688. {0x1258, 0x1258, L},
  689. {0x125a, 0x125d, L},
  690. {0x1260, 0x1286, L},
  691. {0x1288, 0x1288, L},
  692. {0x128a, 0x128d, L},
  693. {0x1290, 0x12ae, L},
  694. {0x12b0, 0x12b0, L},
  695. {0x12b2, 0x12b5, L},
  696. {0x12b8, 0x12be, L},
  697. {0x12c0, 0x12c0, L},
  698. {0x12c2, 0x12c5, L},
  699. {0x12c8, 0x12ce, L},
  700. {0x12d0, 0x12d6, L},
  701. {0x12d8, 0x12ee, L},
  702. {0x12f0, 0x130e, L},
  703. {0x1310, 0x1310, L},
  704. {0x1312, 0x1315, L},
  705. {0x1318, 0x131e, L},
  706. {0x1320, 0x1346, L},
  707. {0x1348, 0x135a, L},
  708. {0x1361, 0x137c, L},
  709. {0x13a0, 0x13f4, L},
  710. {0x1401, 0x1676, L},
  711. {0x1680, 0x1680, WS},
  712. {0x1681, 0x169a, L},
  713. {0x16a0, 0x16f0, L},
  714. {0x1700, 0x170c, L},
  715. {0x170e, 0x1711, L},
  716. {0x1712, 0x1714, NSM},
  717. {0x1720, 0x1731, L},
  718. {0x1732, 0x1734, NSM},
  719. {0x1735, 0x1736, L},
  720. {0x1740, 0x1751, L},
  721. {0x1752, 0x1753, NSM},
  722. {0x1760, 0x176c, L},
  723. {0x176e, 0x1770, L},
  724. {0x1772, 0x1773, NSM},
  725. {0x1780, 0x17b6, L},
  726. {0x17b7, 0x17bd, NSM},
  727. {0x17be, 0x17c5, L},
  728. {0x17c6, 0x17c6, NSM},
  729. {0x17c7, 0x17c8, L},
  730. {0x17c9, 0x17d3, NSM},
  731. {0x17d4, 0x17da, L},
  732. {0x17db, 0x17db, ET},
  733. {0x17dc, 0x17dc, L},
  734. {0x17dd, 0x17dd, NSM},
  735. {0x17e0, 0x17e9, L},
  736. {0x180b, 0x180d, NSM},
  737. {0x180e, 0x180e, WS},
  738. {0x1810, 0x1819, L},
  739. {0x1820, 0x1877, L},
  740. {0x1880, 0x18a8, L},
  741. {0x18a9, 0x18a9, NSM},
  742. {0x1900, 0x191c, L},
  743. {0x1920, 0x1922, NSM},
  744. {0x1923, 0x1926, L},
  745. {0x1927, 0x192b, NSM},
  746. {0x1930, 0x1931, L},
  747. {0x1932, 0x1932, NSM},
  748. {0x1933, 0x1938, L},
  749. {0x1939, 0x193b, NSM},
  750. {0x1946, 0x196d, L},
  751. {0x1970, 0x1974, L},
  752. {0x1d00, 0x1d6b, L},
  753. {0x1e00, 0x1e9b, L},
  754. {0x1ea0, 0x1ef9, L},
  755. {0x1f00, 0x1f15, L},
  756. {0x1f18, 0x1f1d, L},
  757. {0x1f20, 0x1f45, L},
  758. {0x1f48, 0x1f4d, L},
  759. {0x1f50, 0x1f57, L},
  760. {0x1f59, 0x1f59, L},
  761. {0x1f5b, 0x1f5b, L},
  762. {0x1f5d, 0x1f5d, L},
  763. {0x1f5f, 0x1f7d, L},
  764. {0x1f80, 0x1fb4, L},
  765. {0x1fb6, 0x1fbc, L},
  766. {0x1fbe, 0x1fbe, L},
  767. {0x1fc2, 0x1fc4, L},
  768. {0x1fc6, 0x1fcc, L},
  769. {0x1fd0, 0x1fd3, L},
  770. {0x1fd6, 0x1fdb, L},
  771. {0x1fe0, 0x1fec, L},
  772. {0x1ff2, 0x1ff4, L},
  773. {0x1ff6, 0x1ffc, L},
  774. {0x2000, 0x200a, WS},
  775. {0x200b, 0x200d, BN},
  776. {0x200e, 0x200e, L},
  777. {0x200f, 0x200f, R},
  778. {0x2028, 0x2028, WS},
  779. {0x2029, 0x2029, B},
  780. {0x202a, 0x202a, LRE},
  781. {0x202b, 0x202b, RLE},
  782. {0x202c, 0x202c, PDF},
  783. {0x202d, 0x202d, LRO},
  784. {0x202e, 0x202e, RLO},
  785. {0x202f, 0x202f, WS},
  786. {0x2030, 0x2034, ET},
  787. {0x2044, 0x2044, CS},
  788. {0x205f, 0x205f, WS},
  789. {0x2060, 0x2063, BN},
  790. {0x206a, 0x206f, BN},
  791. {0x2070, 0x2070, EN},
  792. {0x2071, 0x2071, L},
  793. {0x2074, 0x2079, EN},
  794. {0x207a, 0x207b, ET},
  795. {0x207f, 0x207f, L},
  796. {0x2080, 0x2089, EN},
  797. {0x208a, 0x208b, ET},
  798. {0x20a0, 0x20b1, ET},
  799. {0x20d0, 0x20ea, NSM},
  800. {0x2102, 0x2102, L},
  801. {0x2107, 0x2107, L},
  802. {0x210a, 0x2113, L},
  803. {0x2115, 0x2115, L},
  804. {0x2119, 0x211d, L},
  805. {0x2124, 0x2124, L},
  806. {0x2126, 0x2126, L},
  807. {0x2128, 0x2128, L},
  808. {0x212a, 0x212d, L},
  809. {0x212e, 0x212e, ET},
  810. {0x212f, 0x2131, L},
  811. {0x2133, 0x2139, L},
  812. {0x213d, 0x213f, L},
  813. {0x2145, 0x2149, L},
  814. {0x2160, 0x2183, L},
  815. {0x2212, 0x2213, ET},
  816. {0x2336, 0x237a, L},
  817. {0x2395, 0x2395, L},
  818. {0x2488, 0x249b, EN},
  819. {0x249c, 0x24e9, L},
  820. {0x2800, 0x28ff, L},
  821. {0x3000, 0x3000, WS},
  822. {0x3005, 0x3007, L},
  823. {0x3021, 0x3029, L},
  824. {0x302a, 0x302f, NSM},
  825. {0x3031, 0x3035, L},
  826. {0x3038, 0x303c, L},
  827. {0x3041, 0x3096, L},
  828. {0x3099, 0x309a, NSM},
  829. {0x309d, 0x309f, L},
  830. {0x30a1, 0x30fa, L},
  831. {0x30fc, 0x30ff, L},
  832. {0x3105, 0x312c, L},
  833. {0x3131, 0x318e, L},
  834. {0x3190, 0x31b7, L},
  835. {0x31f0, 0x321c, L},
  836. {0x3220, 0x3243, L},
  837. {0x3260, 0x327b, L},
  838. {0x327f, 0x32b0, L},
  839. {0x32c0, 0x32cb, L},
  840. {0x32d0, 0x32fe, L},
  841. {0x3300, 0x3376, L},
  842. {0x337b, 0x33dd, L},
  843. {0x33e0, 0x33fe, L},
  844. {0x3400, 0x4db5, L},
  845. {0x4e00, 0x9fa5, L},
  846. {0xa000, 0xa48c, L},
  847. {0xac00, 0xd7a3, L},
  848. {0xd800, 0xfa2d, L},
  849. {0xfa30, 0xfa6a, L},
  850. {0xfb00, 0xfb06, L},
  851. {0xfb13, 0xfb17, L},
  852. {0xfb1d, 0xfb1d, R},
  853. {0xfb1e, 0xfb1e, NSM},
  854. {0xfb1f, 0xfb28, R},
  855. {0xfb29, 0xfb29, ET},
  856. {0xfb2a, 0xfb36, R},
  857. {0xfb38, 0xfb3c, R},
  858. {0xfb3e, 0xfb3e, R},
  859. {0xfb40, 0xfb41, R},
  860. {0xfb43, 0xfb44, R},
  861. {0xfb46, 0xfb4f, R},
  862. {0xfb50, 0xfbb1, AL},
  863. {0xfbd3, 0xfd3d, AL},
  864. {0xfd50, 0xfd8f, AL},
  865. {0xfd92, 0xfdc7, AL},
  866. {0xfdf0, 0xfdfc, AL},
  867. {0xfe00, 0xfe0f, NSM},
  868. {0xfe20, 0xfe23, NSM},
  869. {0xfe50, 0xfe50, CS},
  870. {0xfe52, 0xfe52, CS},
  871. {0xfe55, 0xfe55, CS},
  872. {0xfe5f, 0xfe5f, ET},
  873. {0xfe62, 0xfe63, ET},
  874. {0xfe69, 0xfe6a, ET},
  875. {0xfe70, 0xfe74, AL},
  876. {0xfe76, 0xfefc, AL},
  877. {0xfeff, 0xfeff, BN},
  878. {0xff03, 0xff05, ET},
  879. {0xff0b, 0xff0b, ET},
  880. {0xff0c, 0xff0c, CS},
  881. {0xff0d, 0xff0d, ET},
  882. {0xff0e, 0xff0e, CS},
  883. {0xff0f, 0xff0f, ES},
  884. {0xff10, 0xff19, EN},
  885. {0xff1a, 0xff1a, CS},
  886. {0xff21, 0xff3a, L},
  887. {0xff41, 0xff5a, L},
  888. {0xff66, 0xffbe, L},
  889. {0xffc2, 0xffc7, L},
  890. {0xffca, 0xffcf, L},
  891. {0xffd2, 0xffd7, L},
  892. {0xffda, 0xffdc, L},
  893. {0xffe0, 0xffe1, ET},
  894. {0xffe5, 0xffe6, ET},
  895. {0x10000, 0x1000b, L},
  896. {0x1000d, 0x10026, L},
  897. {0x10028, 0x1003a, L},
  898. {0x1003c, 0x1003d, L},
  899. {0x1003f, 0x1004d, L},
  900. {0x10050, 0x1005d, L},
  901. {0x10080, 0x100fa, L},
  902. {0x10100, 0x10100, L},
  903. {0x10102, 0x10102, L},
  904. {0x10107, 0x10133, L},
  905. {0x10137, 0x1013f, L},
  906. {0x10300, 0x1031e, L},
  907. {0x10320, 0x10323, L},
  908. {0x10330, 0x1034a, L},
  909. {0x10380, 0x1039d, L},
  910. {0x1039f, 0x1039f, L},
  911. {0x10400, 0x1049d, L},
  912. {0x104a0, 0x104a9, L},
  913. {0x10800, 0x10805, R},
  914. {0x10808, 0x10808, R},
  915. {0x1080a, 0x10835, R},
  916. {0x10837, 0x10838, R},
  917. {0x1083c, 0x1083c, R},
  918. {0x1083f, 0x1083f, R},
  919. {0x1d000, 0x1d0f5, L},
  920. {0x1d100, 0x1d126, L},
  921. {0x1d12a, 0x1d166, L},
  922. {0x1d167, 0x1d169, NSM},
  923. {0x1d16a, 0x1d172, L},
  924. {0x1d173, 0x1d17a, BN},
  925. {0x1d17b, 0x1d182, NSM},
  926. {0x1d183, 0x1d184, L},
  927. {0x1d185, 0x1d18b, NSM},
  928. {0x1d18c, 0x1d1a9, L},
  929. {0x1d1aa, 0x1d1ad, NSM},
  930. {0x1d1ae, 0x1d1dd, L},
  931. {0x1d400, 0x1d454, L},
  932. {0x1d456, 0x1d49c, L},
  933. {0x1d49e, 0x1d49f, L},
  934. {0x1d4a2, 0x1d4a2, L},
  935. {0x1d4a5, 0x1d4a6, L},
  936. {0x1d4a9, 0x1d4ac, L},
  937. {0x1d4ae, 0x1d4b9, L},
  938. {0x1d4bb, 0x1d4bb, L},
  939. {0x1d4bd, 0x1d4c3, L},
  940. {0x1d4c5, 0x1d505, L},
  941. {0x1d507, 0x1d50a, L},
  942. {0x1d50d, 0x1d514, L},
  943. {0x1d516, 0x1d51c, L},
  944. {0x1d51e, 0x1d539, L},
  945. {0x1d53b, 0x1d53e, L},
  946. {0x1d540, 0x1d544, L},
  947. {0x1d546, 0x1d546, L},
  948. {0x1d54a, 0x1d550, L},
  949. {0x1d552, 0x1d6a3, L},
  950. {0x1d6a8, 0x1d7c9, L},
  951. {0x1d7ce, 0x1d7ff, EN},
  952. {0x20000, 0x2a6d6, L},
  953. {0x2f800, 0x2fa1d, L},
  954. {0xe0001, 0xe0001, BN},
  955. {0xe0020, 0xe007f, BN},
  956. {0xe0100, 0xe01ef, NSM},
  957. {0xf0000, 0xffffd, L},
  958. {0x100000, 0x10fffd, L}
  959. };
  960. int i, j, k;
  961. i = -1;
  962. j = lenof(lookup);
  963. while (j - i > 1) {
  964. k = (i + j) / 2;
  965. if (ch < lookup[k].first)
  966. j = k;
  967. else if (ch > lookup[k].last)
  968. i = k;
  969. else
  970. return lookup[k].type;
  971. }
  972. /*
  973. * If we reach here, the character was not in any of the
  974. * intervals listed in the lookup table. This means we return
  975. * ON (`Other Neutrals'). This is the appropriate code for any
  976. * character genuinely not listed in the Unicode table, and
  977. * also the table above has deliberately left out any
  978. * characters _explicitly_ listed as ON (to save space!).
  979. */
  980. return ON;
  981. }
  982. /*
  983. * Function exported to front ends to allow them to identify
  984. * bidi-active characters (in case, for example, the platform's
  985. * text display function can't conveniently be prevented from doing
  986. * its own bidi and so special treatment is required for characters
  987. * that would cause the bidi algorithm to activate).
  988. *
  989. * This function is passed a single Unicode code point, and returns
  990. * nonzero if the presence of this code point can possibly cause
  991. * the bidi algorithm to do any reordering. Thus, any string
  992. * composed entirely of characters for which is_rtl() returns zero
  993. * should be safe to pass to a bidi-active platform display
  994. * function without fear.
  995. *
  996. * (is_rtl() must therefore also return true for any character
  997. * which would be affected by Arabic shaping, but this isn't
  998. * important because all such characters are right-to-left so it
  999. * would have flagged them anyway.)
  1000. */
  1001. int is_rtl(int c)
  1002. {
  1003. /*
  1004. * After careful reading of the Unicode bidi algorithm (URL as
  1005. * given at the top of this file) I believe that the only
  1006. * character classes which can possibly cause trouble are R,
  1007. * AL, RLE and RLO. I think that any string containing no
  1008. * character in any of those classes will be displayed
  1009. * uniformly left-to-right by the Unicode bidi algorithm.
  1010. */
  1011. const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO);
  1012. return mask & (1 << (getType(c)));
  1013. }
  1014. /*
  1015. * The most significant 2 bits of each level are used to store
  1016. * Override status of each character
  1017. * This function sets the override bits of level according
  1018. * to the value in override, and reurns the new byte.
  1019. */
  1020. unsigned char setOverrideBits(unsigned char level, unsigned char override)
  1021. {
  1022. if (override == ON)
  1023. return level;
  1024. else if (override == R)
  1025. return level | OISR;
  1026. else if (override == L)
  1027. return level | OISL;
  1028. return level;
  1029. }
  1030. /*
  1031. * Find the most recent run of the same value in `level', and
  1032. * return the value _before_ it. Used to process U+202C POP
  1033. * DIRECTIONAL FORMATTING.
  1034. */
  1035. int getPreviousLevel(unsigned char* level, int from)
  1036. {
  1037. if (from > 0) {
  1038. unsigned char current = level[--from];
  1039. while (from >= 0 && level[from] == current)
  1040. from--;
  1041. if (from >= 0)
  1042. return level[from];
  1043. return -1;
  1044. } else
  1045. return -1;
  1046. }
  1047. /* The Main shaping function, and the only one to be used
  1048. * by the outside world.
  1049. *
  1050. * line: buffer to apply shaping to. this must be passed by doBidi() first
  1051. * to: output buffer for the shaped data
  1052. * count: number of characters in line
  1053. */
  1054. int do_shape(bidi_char *line, bidi_char *to, int count)
  1055. {
  1056. int i, tempShape, ligFlag;
  1057. for (ligFlag=i=0; i<count; i++) {
  1058. to[i] = line[i];
  1059. tempShape = STYPE(line[i].wc);
  1060. switch (tempShape) {
  1061. case SC:
  1062. break;
  1063. case SU:
  1064. break;
  1065. case SR:
  1066. tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
  1067. if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
  1068. to[i].wc = SFINAL((SISOLATED(line[i].wc)));
  1069. else
  1070. to[i].wc = SISOLATED(line[i].wc);
  1071. break;
  1072. case SD:
  1073. /* Make Ligatures */
  1074. tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
  1075. if (line[i].wc == 0x644) {
  1076. if (i > 0) switch (line[i-1].wc) {
  1077. case 0x622:
  1078. ligFlag = 1;
  1079. if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
  1080. to[i].wc = 0xFEF6;
  1081. else
  1082. to[i].wc = 0xFEF5;
  1083. break;
  1084. case 0x623:
  1085. ligFlag = 1;
  1086. if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
  1087. to[i].wc = 0xFEF8;
  1088. else
  1089. to[i].wc = 0xFEF7;
  1090. break;
  1091. case 0x625:
  1092. ligFlag = 1;
  1093. if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
  1094. to[i].wc = 0xFEFA;
  1095. else
  1096. to[i].wc = 0xFEF9;
  1097. break;
  1098. case 0x627:
  1099. ligFlag = 1;
  1100. if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
  1101. to[i].wc = 0xFEFC;
  1102. else
  1103. to[i].wc = 0xFEFB;
  1104. break;
  1105. }
  1106. if (ligFlag) {
  1107. to[i-1].wc = 0x20;
  1108. ligFlag = 0;
  1109. break;
  1110. }
  1111. }
  1112. if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
  1113. tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
  1114. if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
  1115. to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
  1116. else
  1117. to[i].wc = SFINAL((SISOLATED(line[i].wc)));
  1118. break;
  1119. }
  1120. tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
  1121. if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
  1122. to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
  1123. else
  1124. to[i].wc = SISOLATED(line[i].wc);
  1125. break;
  1126. }
  1127. }
  1128. return 1;
  1129. }
  1130. /*
  1131. * The Main Bidi Function, and the only function that should
  1132. * be used by the outside world.
  1133. *
  1134. * line: a buffer of size count containing text to apply
  1135. * the Bidirectional algorithm to.
  1136. */
  1137. int do_bidi(bidi_char *line, int count)
  1138. {
  1139. unsigned char* types;
  1140. unsigned char* levels;
  1141. unsigned char paragraphLevel;
  1142. unsigned char currentEmbedding;
  1143. unsigned char currentOverride;
  1144. unsigned char tempType;
  1145. int i, j, yes, bover;
  1146. /* Check the presence of R or AL types as optimization */
  1147. yes = 0;
  1148. for (i=0; i<count; i++) {
  1149. int type = getType(line[i].wc);
  1150. if (type == R || type == AL) {
  1151. yes = 1;
  1152. break;
  1153. }
  1154. }
  1155. if (yes == 0)
  1156. return L;
  1157. /* Initialize types, levels */
  1158. types = snewn(count, unsigned char);
  1159. levels = snewn(count, unsigned char);
  1160. /* Rule (P1) NOT IMPLEMENTED
  1161. * P1. Split the text into separate paragraphs. A paragraph separator is
  1162. * kept with the previous paragraph. Within each paragraph, apply all the
  1163. * other rules of this algorithm.
  1164. */
  1165. /* Rule (P2), (P3)
  1166. * P2. In each paragraph, find the first character of type L, AL, or R.
  1167. * P3. If a character is found in P2 and it is of type AL or R, then set
  1168. * the paragraph embedding level to one; otherwise, set it to zero.
  1169. */
  1170. paragraphLevel = 0;
  1171. for (i=0; i<count ; i++) {
  1172. int type = getType(line[i].wc);
  1173. if (type == R || type == AL) {
  1174. paragraphLevel = 1;
  1175. break;
  1176. } else if (type == L)
  1177. break;
  1178. }
  1179. /* Rule (X1)
  1180. * X1. Begin by setting the current embedding level to the paragraph
  1181. * embedding level. Set the directional override status to neutral.
  1182. */
  1183. currentEmbedding = paragraphLevel;
  1184. currentOverride = ON;
  1185. /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
  1186. * X2. With each RLE, compute the least greater odd embedding level.
  1187. * X3. With each LRE, compute the least greater even embedding level.
  1188. * X4. With each RLO, compute the least greater odd embedding level.
  1189. * X5. With each LRO, compute the least greater even embedding level.
  1190. * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
  1191. * a. Set the level of the current character to the current
  1192. * embedding level.
  1193. * b. Whenever the directional override status is not neutral,
  1194. * reset the current character type to the directional
  1195. * override status.
  1196. * X7. With each PDF, determine the matching embedding or override code.
  1197. * If there was a valid matching code, restore (pop) the last
  1198. * remembered (pushed) embedding level and directional override.
  1199. * X8. All explicit directional embeddings and overrides are completely
  1200. * terminated at the end of each paragraph. Paragraph separators are not
  1201. * included in the embedding. (Useless here) NOT IMPLEMENTED
  1202. */
  1203. bover = 0;
  1204. for (i=0; i<count; i++) {
  1205. tempType = getType(line[i].wc);
  1206. switch (tempType) {
  1207. case RLE:
  1208. currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
  1209. levels[i] = setOverrideBits(levels[i], currentOverride);
  1210. currentOverride = ON;
  1211. break;
  1212. case LRE:
  1213. currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
  1214. levels[i] = setOverrideBits(levels[i], currentOverride);
  1215. currentOverride = ON;
  1216. break;
  1217. case RLO:
  1218. currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
  1219. tempType = currentOverride = R;
  1220. bover = 1;
  1221. break;
  1222. case LRO:
  1223. currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
  1224. tempType = currentOverride = L;
  1225. bover = 1;
  1226. break;
  1227. case PDF:
  1228. {
  1229. int prevlevel = getPreviousLevel(levels, i);
  1230. if (prevlevel == -1) {
  1231. currentEmbedding = paragraphLevel;
  1232. currentOverride = ON;
  1233. } else {
  1234. currentOverride = currentEmbedding & OMASK;
  1235. currentEmbedding = currentEmbedding & ~OMASK;
  1236. }
  1237. }
  1238. levels[i] = currentEmbedding;
  1239. break;
  1240. /* Whitespace is treated as neutral for now */
  1241. case WS:
  1242. case S:
  1243. levels[i] = currentEmbedding;
  1244. tempType = ON;
  1245. if (currentOverride != ON)
  1246. tempType = currentOverride;
  1247. break;
  1248. default:
  1249. levels[i] = currentEmbedding;
  1250. if (currentOverride != ON)
  1251. tempType = currentOverride;
  1252. break;
  1253. }
  1254. types[i] = tempType;
  1255. }
  1256. /* this clears out all overrides, so we can use levels safely... */
  1257. /* checks bover first */
  1258. if (bover)
  1259. for (i=0; i<count; i++)
  1260. levels[i] = levels[i] & LMASK;
  1261. /* Rule (X9)
  1262. * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
  1263. * Here, they're converted to BN.
  1264. */
  1265. for (i=0; i<count; i++) {
  1266. switch (types[i]) {
  1267. case RLE:
  1268. case LRE:
  1269. case RLO:
  1270. case LRO:
  1271. case PDF:
  1272. types[i] = BN;
  1273. break;
  1274. }
  1275. }
  1276. /* Rule (W1)
  1277. * W1. Examine each non-spacing mark (NSM) in the level run, and change
  1278. * the type of the NSM to the type of the previous character. If the NSM
  1279. * is at the start of the level run, it will get the type of sor.
  1280. */
  1281. if (types[0] == NSM)
  1282. types[0] = paragraphLevel;
  1283. for (i=1; i<count; i++) {
  1284. if (types[i] == NSM)
  1285. types[i] = types[i-1];
  1286. /* Is this a safe assumption?
  1287. * I assumed the previous, IS a character.
  1288. */
  1289. }
  1290. /* Rule (W2)
  1291. * W2. Search backwards from each instance of a European number until the
  1292. * first strong type (R, L, AL, or sor) is found. If an AL is found,
  1293. * change the type of the European number to Arabic number.
  1294. */
  1295. for (i=0; i<count; i++) {
  1296. if (types[i] == EN) {
  1297. j=i;
  1298. while (j >= 0) {
  1299. if (types[j] == AL) {
  1300. types[i] = AN;
  1301. break;
  1302. } else if (types[j] == R || types[j] == L) {
  1303. break;
  1304. }
  1305. j--;
  1306. }
  1307. }
  1308. }
  1309. /* Rule (W3)
  1310. * W3. Change all ALs to R.
  1311. *
  1312. * Optimization: on Rule Xn, we might set a flag on AL type
  1313. * to prevent this loop in L R lines only...
  1314. */
  1315. for (i=0; i<count; i++) {
  1316. if (types[i] == AL)
  1317. types[i] = R;
  1318. }
  1319. /* Rule (W4)
  1320. * W4. A single European separator between two European numbers changes
  1321. * to a European number. A single common separator between two numbers
  1322. * of the same type changes to that type.
  1323. */
  1324. for (i=1; i<(count-1); i++) {
  1325. if (types[i] == ES) {
  1326. if (types[i-1] == EN && types[i+1] == EN)
  1327. types[i] = EN;
  1328. } else if (types[i] == CS) {
  1329. if (types[i-1] == EN && types[i+1] == EN)
  1330. types[i] = EN;
  1331. else if (types[i-1] == AN && types[i+1] == AN)
  1332. types[i] = AN;
  1333. }
  1334. }
  1335. /* Rule (W5)
  1336. * W5. A sequence of European terminators adjacent to European numbers
  1337. * changes to all European numbers.
  1338. *
  1339. * Optimization: lots here... else ifs need rearrangement
  1340. */
  1341. for (i=0; i<count; i++) {
  1342. if (types[i] == ET) {
  1343. if (i > 0 && types[i-1] == EN) {
  1344. types[i] = EN;
  1345. continue;
  1346. } else if (i < count-1 && types[i+1] == EN) {
  1347. types[i] = EN;
  1348. continue;
  1349. } else if (i < count-1 && types[i+1] == ET) {
  1350. j=i;
  1351. while (j <count && types[j] == ET) {
  1352. j++;
  1353. }
  1354. if (types[j] == EN)
  1355. types[i] = EN;
  1356. }
  1357. }
  1358. }
  1359. /* Rule (W6)
  1360. * W6. Otherwise, separators and terminators change to Other Neutral:
  1361. */
  1362. for (i=0; i<count; i++) {
  1363. switch (types[i]) {
  1364. case ES:
  1365. case ET:
  1366. case CS:
  1367. types[i] = ON;
  1368. break;
  1369. }
  1370. }
  1371. /* Rule (W7)
  1372. * W7. Search backwards from each instance of a European number until
  1373. * the first strong type (R, L, or sor) is found. If an L is found,
  1374. * then change the type of the European number to L.
  1375. */
  1376. for (i=0; i<count; i++) {
  1377. if (types[i] == EN) {
  1378. j=i;
  1379. while (j >= 0) {
  1380. if (types[j] == L) {
  1381. types[i] = L;
  1382. break;
  1383. } else if (types[j] == R || types[j] == AL) {
  1384. break;
  1385. }
  1386. j--;
  1387. }
  1388. }
  1389. }
  1390. /* Rule (N1)
  1391. * N1. A sequence of neutrals takes the direction of the surrounding
  1392. * strong text if the text on both sides has the same direction. European
  1393. * and Arabic numbers are treated as though they were R.
  1394. */
  1395. if (count >= 2 && types[0] == ON) {
  1396. if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
  1397. types[0] = R;
  1398. else if (types[1] == L)
  1399. types[0] = L;
  1400. }
  1401. for (i=1; i<(count-1); i++) {
  1402. if (types[i] == ON) {
  1403. if (types[i-1] == L) {
  1404. j=i;
  1405. while (j<(count-1) && types[j] == ON) {
  1406. j++;
  1407. }
  1408. if (types[j] == L) {
  1409. while (i<j) {
  1410. types[i] = L;
  1411. i++;
  1412. }
  1413. }
  1414. } else if ((types[i-1] == R) ||
  1415. (types[i-1] == EN) ||
  1416. (types[i-1] == AN)) {
  1417. j=i;
  1418. while (j<(count-1) && types[j] == ON) {
  1419. j++;
  1420. }
  1421. if ((types[j] == R) ||
  1422. (types[j] == EN) ||
  1423. (types[j] == AN)) {
  1424. while (i<j) {
  1425. types[i] = R;
  1426. i++;
  1427. }
  1428. }
  1429. }
  1430. }
  1431. }
  1432. if (count >= 2 && types[count-1] == ON) {
  1433. if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
  1434. types[count-1] = R;
  1435. else if (types[count-2] == L)
  1436. types[count-1] = L;
  1437. }
  1438. /* Rule (N2)
  1439. * N2. Any remaining neutrals take the embedding direction.
  1440. */
  1441. for (i=0; i<count; i++) {
  1442. if (types[i] == ON) {
  1443. if ((levels[i] % 2) == 0)
  1444. types[i] = L;
  1445. else
  1446. types[i] = R;
  1447. }
  1448. }
  1449. /* Rule (I1)
  1450. * I1. For all characters with an even (left-to-right) embedding
  1451. * direction, those of type R go up one level and those of type AN or
  1452. * EN go up two levels.
  1453. */
  1454. for (i=0; i<count; i++) {
  1455. if ((levels[i] % 2) == 0) {
  1456. if (types[i] == R)
  1457. levels[i] += 1;
  1458. else if (types[i] == AN || types[i] == EN)
  1459. levels[i] += 2;
  1460. }
  1461. }
  1462. /* Rule (I2)
  1463. * I2. For all characters with an odd (right-to-left) embedding direction,
  1464. * those of type L, EN or AN go up one level.
  1465. */
  1466. for (i=0; i<count; i++) {
  1467. if ((levels[i] % 2) == 1) {
  1468. if (types[i] == L || types[i] == EN || types[i] == AN)
  1469. levels[i] += 1;
  1470. }
  1471. }
  1472. /* Rule (L1)
  1473. * L1. On each line, reset the embedding level of the following characters
  1474. * to the paragraph embedding level:
  1475. * (1)segment separators, (2)paragraph separators,
  1476. * (3)any sequence of whitespace characters preceding
  1477. * a segment separator or paragraph separator,
  1478. * (4)and any sequence of white space characters
  1479. * at the end of the line.
  1480. * The types of characters used here are the original types, not those
  1481. * modified by the previous phase.
  1482. */
  1483. j=count-1;
  1484. while (j>0 && (getType(line[j].wc) == WS)) {
  1485. j--;
  1486. }
  1487. if (j < (count-1)) {
  1488. for (j++; j<count; j++)
  1489. levels[j] = paragraphLevel;
  1490. }
  1491. for (i=0; i<count; i++) {
  1492. tempType = getType(line[i].wc);
  1493. if (tempType == WS) {
  1494. j=i;
  1495. while (j<count && (getType(line[j].wc) == WS)) {
  1496. j++;
  1497. }
  1498. if (j==count || getType(line[j].wc) == B ||
  1499. getType(line[j].wc) == S) {
  1500. for (j--; j>=i ; j--) {
  1501. levels[j] = paragraphLevel;
  1502. }
  1503. }
  1504. } else if (tempType == B || tempType == S) {
  1505. levels[i] = paragraphLevel;
  1506. }
  1507. }
  1508. /* Rule (L4) NOT IMPLEMENTED
  1509. * L4. A character that possesses the mirrored property as specified by
  1510. * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
  1511. * resolved directionality of that character is R.
  1512. */
  1513. /* Note: this is implemented before L2 for efficiency */
  1514. for (i=0; i<count; i++)
  1515. if ((levels[i] % 2) == 1)
  1516. doMirror(&line[i].wc);
  1517. /* Rule (L2)
  1518. * L2. From the highest level found in the text to the lowest odd level on
  1519. * each line, including intermediate levels not actually present in the
  1520. * text, reverse any contiguous sequence of characters that are at that
  1521. * level or higher
  1522. */
  1523. /* we flip the character string and leave the level array */
  1524. i=0;
  1525. tempType = levels[0];
  1526. while (i < count) {
  1527. if (levels[i] > tempType)
  1528. tempType = levels[i];
  1529. i++;
  1530. }
  1531. /* maximum level in tempType. */
  1532. while (tempType > 0) { /* loop from highest level to the least odd, */
  1533. /* which i assume is 1 */
  1534. flipThisRun(line, levels, tempType, count);
  1535. tempType--;
  1536. }
  1537. /* Rule (L3) NOT IMPLEMENTED
  1538. * L3. Combining marks applied to a right-to-left base character will at
  1539. * this point precede their base character. If the rendering engine
  1540. * expects them to follow the base characters in the final display
  1541. * process, then the ordering of the marks and the base character must
  1542. * be reversed.
  1543. */
  1544. sfree(types);
  1545. sfree(levels);
  1546. return R;
  1547. }
  1548. /*
  1549. * Bad, Horrible function
  1550. * takes a pointer to a character that is checked for
  1551. * having a mirror glyph.
  1552. */
  1553. void doMirror(unsigned int *ch)
  1554. {
  1555. if ((*ch & 0xFF00) == 0) {
  1556. switch (*ch) {
  1557. case 0x0028: *ch = 0x0029; break;
  1558. case 0x0029: *ch = 0x0028; break;
  1559. case 0x003C: *ch = 0x003E; break;
  1560. case 0x003E: *ch = 0x003C; break;
  1561. case 0x005B: *ch = 0x005D; break;
  1562. case 0x005D: *ch = 0x005B; break;
  1563. case 0x007B: *ch = 0x007D; break;
  1564. case 0x007D: *ch = 0x007B; break;
  1565. case 0x00AB: *ch = 0x00BB; break;
  1566. case 0x00BB: *ch = 0x00AB; break;
  1567. }
  1568. } else if ((*ch & 0xFF00) == 0x2000) {
  1569. switch (*ch) {
  1570. case 0x2039: *ch = 0x203A; break;
  1571. case 0x203A: *ch = 0x2039; break;
  1572. case 0x2045: *ch = 0x2046; break;
  1573. case 0x2046: *ch = 0x2045; break;
  1574. case 0x207D: *ch = 0x207E; break;
  1575. case 0x207E: *ch = 0x207D; break;
  1576. case 0x208D: *ch = 0x208E; break;
  1577. case 0x208E: *ch = 0x208D; break;
  1578. }
  1579. } else if ((*ch & 0xFF00) == 0x2200) {
  1580. switch (*ch) {
  1581. case 0x2208: *ch = 0x220B; break;
  1582. case 0x2209: *ch = 0x220C; break;
  1583. case 0x220A: *ch = 0x220D; break;
  1584. case 0x220B: *ch = 0x2208; break;
  1585. case 0x220C: *ch = 0x2209; break;
  1586. case 0x220D: *ch = 0x220A; break;
  1587. case 0x2215: *ch = 0x29F5; break;
  1588. case 0x223C: *ch = 0x223D; break;
  1589. case 0x223D: *ch = 0x223C; break;
  1590. case 0x2243: *ch = 0x22CD; break;
  1591. case 0x2252: *ch = 0x2253; break;
  1592. case 0x2253: *ch = 0x2252; break;
  1593. case 0x2254: *ch = 0x2255; break;
  1594. case 0x2255: *ch = 0x2254; break;
  1595. case 0x2264: *ch = 0x2265; break;
  1596. case 0x2265: *ch = 0x2264; break;
  1597. case 0x2266: *ch = 0x2267; break;
  1598. case 0x2267: *ch = 0x2266; break;
  1599. case 0x2268: *ch = 0x2269; break;
  1600. case 0x2269: *ch = 0x2268; break;
  1601. case 0x226A: *ch = 0x226B; break;
  1602. case 0x226B: *ch = 0x226A; break;
  1603. case 0x226E: *ch = 0x226F; break;
  1604. case 0x226F: *ch = 0x226E; break;
  1605. case 0x2270: *ch = 0x2271; break;
  1606. case 0x2271: *ch = 0x2270; break;
  1607. case 0x2272: *ch = 0x2273; break;
  1608. case 0x2273: *ch = 0x2272; break;
  1609. case 0x2274: *ch = 0x2275; break;
  1610. case 0x2275: *ch = 0x2274; break;
  1611. case 0x2276: *ch = 0x2277; break;
  1612. case 0x2277: *ch = 0x2276; break;
  1613. case 0x2278: *ch = 0x2279; break;
  1614. case 0x2279: *ch = 0x2278; break;
  1615. case 0x227A: *ch = 0x227B; break;
  1616. case 0x227B: *ch = 0x227A; break;
  1617. case 0x227C: *ch = 0x227D; break;
  1618. case 0x227D: *ch = 0x227C; break;
  1619. case 0x227E: *ch = 0x227F; break;
  1620. case 0x227F: *ch = 0x227E; break;
  1621. case 0x2280: *ch = 0x2281; break;
  1622. case 0x2281: *ch = 0x2280; break;
  1623. case 0x2282: *ch = 0x2283; break;
  1624. case 0x2283: *ch = 0x2282; break;
  1625. case 0x2284: *ch = 0x2285; break;
  1626. case 0x2285: *ch = 0x2284; break;
  1627. case 0x2286: *ch = 0x2287; break;
  1628. case 0x2287: *ch = 0x2286; break;
  1629. case 0x2288: *ch = 0x2289; break;
  1630. case 0x2289: *ch = 0x2288; break;
  1631. case 0x228A: *ch = 0x228B; break;
  1632. case 0x228B: *ch = 0x228A; break;
  1633. case 0x228F: *ch = 0x2290; break;
  1634. case 0x2290: *ch = 0x228F; break;
  1635. case 0x2291: *ch = 0x2292; break;
  1636. case 0x2292: *ch = 0x2291; break;
  1637. case 0x2298: *ch = 0x29B8; break;
  1638. case 0x22A2: *ch = 0x22A3; break;
  1639. case 0x22A3: *ch = 0x22A2; break;
  1640. case 0x22A6: *ch = 0x2ADE; break;
  1641. case 0x22A8: *ch = 0x2AE4; break;
  1642. case 0x22A9: *ch = 0x2AE3; break;
  1643. case 0x22AB: *ch = 0x2AE5; break;
  1644. case 0x22B0: *ch = 0x22B1; break;
  1645. case 0x22B1: *ch = 0x22B0; break;
  1646. case 0x22B2: *ch = 0x22B3; break;
  1647. case 0x22B3: *ch = 0x22B2; break;
  1648. case 0x22B4: *ch = 0x22B5; break;
  1649. case 0x22B5: *ch = 0x22B4; break;
  1650. case 0x22B6: *ch = 0x22B7; break;
  1651. case 0x22B7: *ch = 0x22B6; break;
  1652. case 0x22C9: *ch = 0x22CA; break;
  1653. case 0x22CA: *ch = 0x22C9; break;
  1654. case 0x22CB: *ch = 0x22CC; break;
  1655. case 0x22CC: *ch = 0x22CB; break;
  1656. case 0x22CD: *ch = 0x2243; break;
  1657. case 0x22D0: *ch = 0x22D1; break;
  1658. case 0x22D1: *ch = 0x22D0; break;
  1659. case 0x22D6: *ch = 0x22D7; break;
  1660. case 0x22D7: *ch = 0x22D6; break;
  1661. case 0x22D8: *ch = 0x22D9; break;
  1662. case 0x22D9: *ch = 0x22D8; break;
  1663. case 0x22DA: *ch = 0x22DB; break;
  1664. case 0x22DB: *ch = 0x22DA; break;
  1665. case 0x22DC: *ch = 0x22DD; break;
  1666. case 0x22DD: *ch = 0x22DC; break;
  1667. case 0x22DE: *ch = 0x22DF; break;
  1668. case 0x22DF: *ch = 0x22DE; break;
  1669. case 0x22E0: *ch = 0x22E1; break;
  1670. case 0x22E1: *ch = 0x22E0; break;
  1671. case 0x22E2: *ch = 0x22E3; break;
  1672. case 0x22E3: *ch = 0x22E2; break;
  1673. case 0x22E4: *ch = 0x22E5; break;
  1674. case 0x22E5: *ch = 0x22E4; break;
  1675. case 0x22E6: *ch = 0x22E7; break;
  1676. case 0x22E7: *ch = 0x22E6; break;
  1677. case 0x22E8: *ch = 0x22E9; break;
  1678. case 0x22E9: *ch = 0x22E8; break;
  1679. case 0x22EA: *ch = 0x22EB; break;
  1680. case 0x22EB: *ch = 0x22EA; break;
  1681. case 0x22EC: *ch = 0x22ED; break;
  1682. case 0x22ED: *ch = 0x22EC; break;
  1683. case 0x22F0: *ch = 0x22F1; break;
  1684. case 0x22F1: *ch = 0x22F0; break;
  1685. case 0x22F2: *ch = 0x22FA; break;
  1686. case 0x22F3: *ch = 0x22FB; break;
  1687. case 0x22F4: *ch = 0x22FC; break;
  1688. case 0x22F6: *ch = 0x22FD; break;
  1689. case 0x22F7: *ch = 0x22FE; break;
  1690. case 0x22FA: *ch = 0x22F2; break;
  1691. case 0x22FB: *ch = 0x22F3; break;
  1692. case 0x22FC: *ch = 0x22F4; break;
  1693. case 0x22FD: *ch = 0x22F6; break;
  1694. case 0x22FE: *ch = 0x22F7; break;
  1695. }
  1696. } else if ((*ch & 0xFF00) == 0x2300) {
  1697. switch (*ch) {
  1698. case 0x2308: *ch = 0x2309; break;
  1699. case 0x2309: *ch = 0x2308; break;
  1700. case 0x230A: *ch = 0x230B; break;
  1701. case 0x230B: *ch = 0x230A; break;
  1702. case 0x2329: *ch = 0x232A; break;
  1703. case 0x232A: *ch = 0x2329; break;
  1704. }
  1705. } else if ((*ch & 0xFF00) == 0x2700) {
  1706. switch (*ch) {
  1707. case 0x2768: *ch = 0x2769; break;
  1708. case 0x2769: *ch = 0x2768; break;
  1709. case 0x276A: *ch = 0x276B; break;
  1710. case 0x276B: *ch = 0x276A; break;
  1711. case 0x276C: *ch = 0x276D; break;
  1712. case 0x276D: *ch = 0x276C; break;
  1713. case 0x276E: *ch = 0x276F; break;
  1714. case 0x276F: *ch = 0x276E; break;
  1715. case 0x2770: *ch = 0x2771; break;
  1716. case 0x2771: *ch = 0x2770; break;
  1717. case 0x2772: *ch = 0x2773; break;
  1718. case 0x2773: *ch = 0x2772; break;
  1719. case 0x2774: *ch = 0x2775; break;
  1720. case 0x2775: *ch = 0x2774; break;
  1721. case 0x27D5: *ch = 0x27D6; break;
  1722. case 0x27D6: *ch = 0x27D5; break;
  1723. case 0x27DD: *ch = 0x27DE; break;
  1724. case 0x27DE: *ch = 0x27DD; break;
  1725. case 0x27E2: *ch = 0x27E3; break;
  1726. case 0x27E3: *ch = 0x27E2; break;
  1727. case 0x27E4: *ch = 0x27E5; break;
  1728. case 0x27E5: *ch = 0x27E4; break;
  1729. case 0x27E6: *ch = 0x27E7; break;
  1730. case 0x27E7: *ch = 0x27E6; break;
  1731. case 0x27E8: *ch = 0x27E9; break;
  1732. case 0x27E9: *ch = 0x27E8; break;
  1733. case 0x27EA: *ch = 0x27EB; break;
  1734. case 0x27EB: *ch = 0x27EA; break;
  1735. }
  1736. } else if ((*ch & 0xFF00) == 0x2900) {
  1737. switch (*ch) {
  1738. case 0x2983: *ch = 0x2984; break;
  1739. case 0x2984: *ch = 0x2983; break;
  1740. case 0x2985: *ch = 0x2986; break;
  1741. case 0x2986: *ch = 0x2985; break;
  1742. case 0x2987: *ch = 0x2988; break;
  1743. case 0x2988: *ch = 0x2987; break;
  1744. case 0x2989: *ch = 0x298A; break;
  1745. case 0x298A: *ch = 0x2989; break;
  1746. case 0x298B: *ch = 0x298C; break;
  1747. case 0x298C: *ch = 0x298B; break;
  1748. case 0x298D: *ch = 0x2990; break;
  1749. case 0x298E: *ch = 0x298F; break;
  1750. case 0x298F: *ch = 0x298E; break;
  1751. case 0x2990: *ch = 0x298D; break;
  1752. case 0x2991: *ch = 0x2992; break;
  1753. case 0x2992: *ch = 0x2991; break;
  1754. case 0x2993: *ch = 0x2994; break;
  1755. case 0x2994: *ch = 0x2993; break;
  1756. case 0x2995: *ch = 0x2996; break;
  1757. case 0x2996: *ch = 0x2995; break;
  1758. case 0x2997: *ch = 0x2998; break;
  1759. case 0x2998: *ch = 0x2997; break;
  1760. case 0x29B8: *ch = 0x2298; break;
  1761. case 0x29C0: *ch = 0x29C1; break;
  1762. case 0x29C1: *ch = 0x29C0; break;
  1763. case 0x29C4: *ch = 0x29C5; break;
  1764. case 0x29C5: *ch = 0x29C4; break;
  1765. case 0x29CF: *ch = 0x29D0; break;
  1766. case 0x29D0: *ch = 0x29CF; break;
  1767. case 0x29D1: *ch = 0x29D2; break;
  1768. case 0x29D2: *ch = 0x29D1; break;
  1769. case 0x29D4: *ch = 0x29D5; break;
  1770. case 0x29D5: *ch = 0x29D4; break;
  1771. case 0x29D8: *ch = 0x29D9; break;
  1772. case 0x29D9: *ch = 0x29D8; break;
  1773. case 0x29DA: *ch = 0x29DB; break;
  1774. case 0x29DB: *ch = 0x29DA; break;
  1775. case 0x29F5: *ch = 0x2215; break;
  1776. case 0x29F8: *ch = 0x29F9; break;
  1777. case 0x29F9: *ch = 0x29F8; break;
  1778. case 0x29FC: *ch = 0x29FD; break;
  1779. case 0x29FD: *ch = 0x29FC; break;
  1780. }
  1781. } else if ((*ch & 0xFF00) == 0x2A00) {
  1782. switch (*ch) {
  1783. case 0x2A2B: *ch = 0x2A2C; break;
  1784. case 0x2A2C: *ch = 0x2A2B; break;
  1785. case 0x2A2D: *ch = 0x2A2C; break;
  1786. case 0x2A2E: *ch = 0x2A2D; break;
  1787. case 0x2A34: *ch = 0x2A35; break;
  1788. case 0x2A35: *ch = 0x2A34; break;
  1789. case 0x2A3C: *ch = 0x2A3D; break;
  1790. case 0x2A3D: *ch = 0x2A3C; break;
  1791. case 0x2A64: *ch = 0x2A65; break;
  1792. case 0x2A65: *ch = 0x2A64; break;
  1793. case 0x2A79: *ch = 0x2A7A; break;
  1794. case 0x2A7A: *ch = 0x2A79; break;
  1795. case 0x2A7D: *ch = 0x2A7E; break;
  1796. case 0x2A7E: *ch = 0x2A7D; break;
  1797. case 0x2A7F: *ch = 0x2A80; break;
  1798. case 0x2A80: *ch = 0x2A7F; break;
  1799. case 0x2A81: *ch = 0x2A82; break;
  1800. case 0x2A82: *ch = 0x2A81; break;
  1801. case 0x2A83: *ch = 0x2A84; break;
  1802. case 0x2A84: *ch = 0x2A83; break;
  1803. case 0x2A8B: *ch = 0x2A8C; break;
  1804. case 0x2A8C: *ch = 0x2A8B; break;
  1805. case 0x2A91: *ch = 0x2A92; break;
  1806. case 0x2A92: *ch = 0x2A91; break;
  1807. case 0x2A93: *ch = 0x2A94; break;
  1808. case 0x2A94: *ch = 0x2A93; break;
  1809. case 0x2A95: *ch = 0x2A96; break;
  1810. case 0x2A96: *ch = 0x2A95; break;
  1811. case 0x2A97: *ch = 0x2A98; break;
  1812. case 0x2A98: *ch = 0x2A97; break;
  1813. case 0x2A99: *ch = 0x2A9A; break;
  1814. case 0x2A9A: *ch = 0x2A99; break;
  1815. case 0x2A9B: *ch = 0x2A9C; break;
  1816. case 0x2A9C: *ch = 0x2A9B; break;
  1817. case 0x2AA1: *ch = 0x2AA2; break;
  1818. case 0x2AA2: *ch = 0x2AA1; break;
  1819. case 0x2AA6: *ch = 0x2AA7; break;
  1820. case 0x2AA7: *ch = 0x2AA6; break;
  1821. case 0x2AA8: *ch = 0x2AA9; break;
  1822. case 0x2AA9: *ch = 0x2AA8; break;
  1823. case 0x2AAA: *ch = 0x2AAB; break;
  1824. case 0x2AAB: *ch = 0x2AAA; break;
  1825. case 0x2AAC: *ch = 0x2AAD; break;
  1826. case 0x2AAD: *ch = 0x2AAC; break;
  1827. case 0x2AAF: *ch = 0x2AB0; break;
  1828. case 0x2AB0: *ch = 0x2AAF; break;
  1829. case 0x2AB3: *ch = 0x2AB4; break;
  1830. case 0x2AB4: *ch = 0x2AB3; break;
  1831. case 0x2ABB: *ch = 0x2ABC; break;
  1832. case 0x2ABC: *ch = 0x2ABB; break;
  1833. case 0x2ABD: *ch = 0x2ABE; break;
  1834. case 0x2ABE: *ch = 0x2ABD; break;
  1835. case 0x2ABF: *ch = 0x2AC0; break;
  1836. case 0x2AC0: *ch = 0x2ABF; break;
  1837. case 0x2AC1: *ch = 0x2AC2; break;
  1838. case 0x2AC2: *ch = 0x2AC1; break;
  1839. case 0x2AC3: *ch = 0x2AC4; break;
  1840. case 0x2AC4: *ch = 0x2AC3; break;
  1841. case 0x2AC5: *ch = 0x2AC6; break;
  1842. case 0x2AC6: *ch = 0x2AC5; break;
  1843. case 0x2ACD: *ch = 0x2ACE; break;
  1844. case 0x2ACE: *ch = 0x2ACD; break;
  1845. case 0x2ACF: *ch = 0x2AD0; break;
  1846. case 0x2AD0: *ch = 0x2ACF; break;
  1847. case 0x2AD1: *ch = 0x2AD2; break;
  1848. case 0x2AD2: *ch = 0x2AD1; break;
  1849. case 0x2AD3: *ch = 0x2AD4; break;
  1850. case 0x2AD4: *ch = 0x2AD3; break;
  1851. case 0x2AD5: *ch = 0x2AD6; break;
  1852. case 0x2AD6: *ch = 0x2AD5; break;
  1853. case 0x2ADE: *ch = 0x22A6; break;
  1854. case 0x2AE3: *ch = 0x22A9; break;
  1855. case 0x2AE4: *ch = 0x22A8; break;
  1856. case 0x2AE5: *ch = 0x22AB; break;
  1857. case 0x2AEC: *ch = 0x2AED; break;
  1858. case 0x2AED: *ch = 0x2AEC; break;
  1859. case 0x2AF7: *ch = 0x2AF8; break;
  1860. case 0x2AF8: *ch = 0x2AF7; break;
  1861. case 0x2AF9: *ch = 0x2AFA; break;
  1862. case 0x2AFA: *ch = 0x2AF9; break;
  1863. }
  1864. } else if ((*ch & 0xFF00) == 0x3000) {
  1865. switch (*ch) {
  1866. case 0x3008: *ch = 0x3009; break;
  1867. case 0x3009: *ch = 0x3008; break;
  1868. case 0x300A: *ch = 0x300B; break;
  1869. case 0x300B: *ch = 0x300A; break;
  1870. case 0x300C: *ch = 0x300D; break;
  1871. case 0x300D: *ch = 0x300C; break;
  1872. case 0x300E: *ch = 0x300F; break;
  1873. case 0x300F: *ch = 0x300E; break;
  1874. case 0x3010: *ch = 0x3011; break;
  1875. case 0x3011: *ch = 0x3010; break;
  1876. case 0x3014: *ch = 0x3015; break;
  1877. case 0x3015: *ch = 0x3014; break;
  1878. case 0x3016: *ch = 0x3017; break;
  1879. case 0x3017: *ch = 0x3016; break;
  1880. case 0x3018: *ch = 0x3019; break;
  1881. case 0x3019: *ch = 0x3018; break;
  1882. case 0x301A: *ch = 0x301B; break;
  1883. case 0x301B: *ch = 0x301A; break;
  1884. }
  1885. } else if ((*ch & 0xFF00) == 0xFF00) {
  1886. switch (*ch) {
  1887. case 0xFF08: *ch = 0xFF09; break;
  1888. case 0xFF09: *ch = 0xFF08; break;
  1889. case 0xFF1C: *ch = 0xFF1E; break;
  1890. case 0xFF1E: *ch = 0xFF1C; break;
  1891. case 0xFF3B: *ch = 0xFF3D; break;
  1892. case 0xFF3D: *ch = 0xFF3B; break;
  1893. case 0xFF5B: *ch = 0xFF5D; break;
  1894. case 0xFF5D: *ch = 0xFF5B; break;
  1895. case 0xFF5F: *ch = 0xFF60; break;
  1896. case 0xFF60: *ch = 0xFF5F; break;
  1897. case 0xFF62: *ch = 0xFF63; break;
  1898. case 0xFF63: *ch = 0xFF62; break;
  1899. }
  1900. }
  1901. }
  1902. #ifdef TEST_GETTYPE
  1903. #include <stdio.h>
  1904. #include <assert.h>
  1905. int main(int argc, char **argv)
  1906. {
  1907. static const struct { int type; char *name; } typetoname[] = {
  1908. #define TYPETONAME(X) { X , #X }
  1909. TYPETONAME(L),
  1910. TYPETONAME(LRE),
  1911. TYPETONAME(LRO),
  1912. TYPETONAME(R),
  1913. TYPETONAME(AL),
  1914. TYPETONAME(RLE),
  1915. TYPETONAME(RLO),
  1916. TYPETONAME(PDF),
  1917. TYPETONAME(EN),
  1918. TYPETONAME(ES),
  1919. TYPETONAME(ET),
  1920. TYPETONAME(AN),
  1921. TYPETONAME(CS),
  1922. TYPETONAME(NSM),
  1923. TYPETONAME(BN),
  1924. TYPETONAME(B),
  1925. TYPETONAME(S),
  1926. TYPETONAME(WS),
  1927. TYPETONAME(ON),
  1928. #undef TYPETONAME
  1929. };
  1930. int i;
  1931. for (i = 1; i < argc; i++) {
  1932. unsigned long chr = strtoul(argv[i], NULL, 0);
  1933. int type = getType(chr);
  1934. assert(typetoname[type].type == type);
  1935. printf("U+%04x: %s\n", chr, typetoname[type].name);
  1936. }
  1937. return 0;
  1938. }
  1939. #endif