jisx4051pairtable.txt 9.0 KB


  1. /*
  2. Simplification of Pair Table in JIS X 4051
  3. 1. The Origion Table - in 4.1.3
  4. In JIS x 4051. The pair table is defined as below
  5. Class of
  6. Leading Class of Trailing Char Class
  7. Char
  8. 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20
  9. * # * #
  10. 1 X X X X X X X X X X X X X X X X X X X X X E
  11. 2 X X X X X X
  12. 3 X X X X X X
  13. 4 X X X X X X
  14. 5 X X X X X X
  15. 6 X X X X X X
  16. 7 X X X X X X X
  17. 8 X X X X X X E
  18. 9 X X X X X X
  19. 10 X X X X X X
  20. 11 X X X X X X
  21. 12 X X X X X X
  22. 13 X X X X X X X
  23. 14 X X X X X X X
  24. 15 X X X X X X X X X
  25. 16 X X X X X X X X
  26. 17 X X X X X E
  27. 18 X X X X X X X X X
  28. 19 X E E E E E X X X X X X X X X X X X E X E E
  29. 20 X X X X X E
  30. * Same Char
  31. # Other Char
  32. 2. Simplified by remove the class which we do not care
  33. However, since we do not care about class 13(Subscript), 14(Ruby),
  34. 19(split line note begin quote), and 20(split line note end quote)
  35. we can simplify this par table into the following
  36. Class of
  37. Leading Class of Trailing Char Class
  38. Char
  39. 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
  40. 1 X X X X X X X X X X X X X X X X
  41. 2 X X X X X
  42. 3 X X X X X
  43. 4 X X X X X
  44. 5 X X X X X
  45. 6 X X X X X
  46. 7 X X X X X X
  47. 8 X X X X X X
  48. 9 X X X X X
  49. 10 X X X X X
  50. 11 X X X X X
  51. 12 X X X X X
  52. 15 X X X X X X X X
  53. 16 X X X X X X X
  54. 17 X X X X X
  55. 18 X X X X X X X X
  56. 3. Simplified by merged classes
  57. After the 2 simplification, the pair table have some duplication
  58. a. class 2, 3, 4, 5, 6, are the same- we can merged them
  59. b. class 10, 11, 12, 17 are the same- we can merged them
  60. Class of
  61. Leading Class of Trailing Char Class
  62. Char
  63. 1 [a] 7 8 9 [b]15 16 18
  64. 1 X X X X X X X X X
  65. [a] X
  66. 7 X X
  67. 8 X X
  68. 9 X
  69. [b] X
  70. 15 X X X X
  71. 16 X X X
  72. 18 X X X X
  73. 4. Now we use one bit to encode weather it is breakable, and use 2 bytes
  74. for one row, then the bit table will look like:
  75. 18 <- 1
  76. 1 0000 0001 1111 1111 = 0x01FF
  77. [a] 0000 0000 0000 0010 = 0x0002
  78. 7 0000 0000 0000 0110 = 0x0006
  79. 8 0000 0000 0100 0010 = 0x0042
  80. 9 0000 0000 0000 0010 = 0x0002
  81. [b] 0000 0000 0000 0010 = 0x0042
  82. 15 0000 0001 0101 0010 = 0x0152
  83. 16 0000 0001 1000 0010 = 0x0182
  84. 17 0000 0001 1100 0010 = 0x01C2
  85. */
  86. static uint16_t gJISx4051SimplifiedPair[9] = {
  87. 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
  88. };
  89. PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
  90. {
  91. NS_ASSERTION( (aCls1 < 9) "invalid class");
  92. NS_ASSERTION( (aCls2 < 9) "invalid class");
  93. return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
  94. }
  95. #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
  96. nsJISx4051Cls XXXX::GetClass(
  97. PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
  98. {
  99. // take care the special case in cls 15
  100. if( ((0x2C == aChar) || (0x2E == aChar)) &&
  101. (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
  102. {
  103. return kJISx4051Cls_15;
  104. }
  105. nsJISx4051Cls cls;
  106. if(gSingle->Lookup(aChar, &cls))
  107. return cls;
  108. if(gRange->Lookup(aChar, &cls))
  109. return cls;
  110. return kJISx4051Cls_15;
  111. }
  112. typedef enum {
  113. kJISx4051Cls_1 = 0,
  114. kJISx4051Cls_2 = 1,
  115. kJISx4051Cls_3 = 1,
  116. kJISx4051Cls_4 = 1,
  117. kJISx4051Cls_5 = 1,
  118. kJISx4051Cls_6 = 1,
  119. kJISx4051Cls_7 = 2,
  120. kJISx4051Cls_8 = 3,
  121. kJISx4051Cls_9 = 4,
  122. kJISx4051Cls_10 = 5,
  123. kJISx4051Cls_11 = 5,
  124. kJISx4051Cls_12 = 5,
  125. // kJISx4051Cls_13 = 0,
  126. // kJISx4051Cls_14 = 0,
  127. kJISx4051Cls_15 = 6,
  128. kJISx4051Cls_16 = 7,
  129. kJISx4051Cls_17 = 5,
  130. kJISx4051Cls_18 = 8,
  131. // kJISx4051Cls_19 = 0,
  132. // kJISx4051Cls_20 = 0
  133. } nsJISx4051Cls;
  134. // Table 2
  135. YYYY(kJISx4051Cls_1 , 0x0028),
  136. YYYY(kJISx4051Cls_1 , 0x005B),
  137. YYYY(kJISx4051Cls_1 , 0x007B),
  138. YYYY(kJISx4051Cls_1 , 0x2018),
  139. YYYY(kJISx4051Cls_1 , 0x201B),
  140. YYYY(kJISx4051Cls_1 , 0x201C),
  141. YYYY(kJISx4051Cls_1 , 0x201F),
  142. YYYY(kJISx4051Cls_1 , 0x3008),
  143. YYYY(kJISx4051Cls_1 , 0x300A),
  144. YYYY(kJISx4051Cls_1 , 0x300C),
  145. YYYY(kJISx4051Cls_1 , 0x300E),
  146. YYYY(kJISx4051Cls_1 , 0x3010),
  147. YYYY(kJISx4051Cls_1 , 0x3014),
  148. YYYY(kJISx4051Cls_1 , 0x3016),
  149. YYYY(kJISx4051Cls_1 , 0x3018),
  150. YYYY(kJISx4051Cls_1 , 0x301A),
  151. YYYY(kJISx4051Cls_1 , 0x301D),
  152. // Table 3
  153. YYYY(kJISx4051Cls_2 , 0x0029),
  154. YYYY(kJISx4051Cls_2 , 0x002C),
  155. YYYY(kJISx4051Cls_2 , 0x005D),
  156. YYYY(kJISx4051Cls_2 , 0x007D),
  157. YYYY(kJISx4051Cls_2 , 0x2019),
  158. YYYY(kJISx4051Cls_2 , 0x201A),
  159. YYYY(kJISx4051Cls_2 , 0x201D),
  160. YYYY(kJISx4051Cls_2 , 0x201E),
  161. YYYY(kJISx4051Cls_2 , 0x3001),
  162. YYYY(kJISx4051Cls_2 , 0x3009),
  163. YYYY(kJISx4051Cls_2 , 0x300B),
  164. YYYY(kJISx4051Cls_2 , 0x300D),
  165. YYYY(kJISx4051Cls_2 , 0x300F),
  166. YYYY(kJISx4051Cls_2 , 0x3011),
  167. YYYY(kJISx4051Cls_2 , 0x3015),
  168. YYYY(kJISx4051Cls_2 , 0x3017),
  169. YYYY(kJISx4051Cls_2 , 0x3019),
  170. YYYY(kJISx4051Cls_2 , 0x301B),
  171. YYYY(kJISx4051Cls_2 , 0x301E),
  172. YYYY(kJISx4051Cls_2 , 0x301F),
  173. // Table 4
  174. YYYY(kJISx4051Cls_3 , 0x203C),
  175. YYYY(kJISx4051Cls_3 , 0x2044),
  176. YYYY(kJISx4051Cls_3 , 0x301C),
  177. YYYY(kJISx4051Cls_3 , 0x3041),
  178. YYYY(kJISx4051Cls_3 , 0x3043),
  179. YYYY(kJISx4051Cls_3 , 0x3045),
  180. YYYY(kJISx4051Cls_3 , 0x3047),
  181. YYYY(kJISx4051Cls_3 , 0x3049),
  182. YYYY(kJISx4051Cls_3 , 0x3063),
  183. YYYY(kJISx4051Cls_3 , 0x3083),
  184. YYYY(kJISx4051Cls_3 , 0x3085),
  185. YYYY(kJISx4051Cls_3 , 0x3087),
  186. YYYY(kJISx4051Cls_3 , 0x308E),
  187. YYYY(kJISx4051Cls_3 , 0x309D),
  188. YYYY(kJISx4051Cls_3 , 0x309E),
  189. YYYY(kJISx4051Cls_3 , 0x30A1),
  190. YYYY(kJISx4051Cls_3 , 0x30A3),
  191. YYYY(kJISx4051Cls_3 , 0x30A5),
  192. YYYY(kJISx4051Cls_3 , 0x30A7),
  193. YYYY(kJISx4051Cls_3 , 0x30A9),
  194. YYYY(kJISx4051Cls_3 , 0x30C3),
  195. YYYY(kJISx4051Cls_3 , 0x30E3),
  196. YYYY(kJISx4051Cls_3 , 0x30E5),
  197. YYYY(kJISx4051Cls_3 , 0x30E7),
  198. YYYY(kJISx4051Cls_3 , 0x30EE),
  199. YYYY(kJISx4051Cls_3 , 0x30F5),
  200. YYYY(kJISx4051Cls_3 , 0x30F6),
  201. YYYY(kJISx4051Cls_3 , 0x30FC),
  202. YYYY(kJISx4051Cls_3 , 0x30FD),
  203. YYYY(kJISx4051Cls_3 , 0x30FE),
  204. // Table 5
  205. YYYY(kJISx4051Cls_4 , 0x0021),
  206. YYYY(kJISx4051Cls_4 , 0x003F),
  207. // Table 6
  208. YYYY(kJISx4051Cls_5 , 0x003A),
  209. YYYY(kJISx4051Cls_5 , 0x003B),
  210. YYYY(kJISx4051Cls_5 , 0x30FB),
  211. // Table 7
  212. YYYY(kJISx4051Cls_6 , 0x002E),
  213. YYYY(kJISx4051Cls_6 , 0x3002),
  214. // Table 8
  215. YYYY(kJISx4051Cls_7 , 0x2014),
  216. YYYY(kJISx4051Cls_7 , 0x2024),
  217. YYYY(kJISx4051Cls_7 , 0x2025),
  218. YYYY(kJISx4051Cls_7 , 0x2026),
  219. // Table 9
  220. YYYY(kJISx4051Cls_8 , 0x0024),
  221. YYYY(kJISx4051Cls_8 , 0x00A3),
  222. YYYY(kJISx4051Cls_8 , 0x00A5),
  223. YYYY(kJISx4051Cls_8 , 0x2116),
  224. // Table 10
  225. YYYY(kJISx4051Cls_9 , 0x0025),
  226. YYYY(kJISx4051Cls_9 , 0x00A2),
  227. YYYY(kJISx4051Cls_9 , 0x00B0),
  228. YYYY(kJISx4051Cls_9 , 0x2030),
  229. YYYY(kJISx4051Cls_9 , 0x2031),
  230. YYYY(kJISx4051Cls_9 , 0x2032),
  231. YYYY(kJISx4051Cls_9 , 0x2033),
  232. // Table 1
  233. YYYY(kJISx4051Cls_10, 0x3000),
  234. // Table 1
  235. ZZZZ(kJISx4051Cls_11, 0x3000),