123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- /*
- Simplification of Pair Table in JIS X 4051
- 1. The Origion Table - in 4.1.3
- In JIS x 4051. The pair table is defined as below
- Class of
- Leading Class of Trailing Char Class
- Char
- 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20
- * # * #
- 1 X X X X X X X X X X X X X X X X X X X X X E
- 2 X X X X X X
- 3 X X X X X X
- 4 X X X X X X
- 5 X X X X X X
- 6 X X X X X X
- 7 X X X X X X X
- 8 X X X X X X E
- 9 X X X X X X
- 10 X X X X X X
- 11 X X X X X X
- 12 X X X X X X
- 13 X X X X X X X
- 14 X X X X X X X
- 15 X X X X X X X X X
- 16 X X X X X X X X
- 17 X X X X X E
- 18 X X X X X X X X X
- 19 X E E E E E X X X X X X X X X X X X E X E E
- 20 X X X X X E
- * Same Char
- # Other Char
- 2. Simplified by remove the class which we do not care
- However, since we do not care about class 13(Subscript), 14(Ruby),
- 19(split line note begin quote), and 20(split line note end quote)
- we can simplify this par table into the following
- Class of
- Leading Class of Trailing Char Class
- Char
- 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
-
- 1 X X X X X X X X X X X X X X X X
- 2 X X X X X
- 3 X X X X X
- 4 X X X X X
- 5 X X X X X
- 6 X X X X X
- 7 X X X X X X
- 8 X X X X X X
- 9 X X X X X
- 10 X X X X X
- 11 X X X X X
- 12 X X X X X
- 15 X X X X X X X X
- 16 X X X X X X X
- 17 X X X X X
- 18 X X X X X X X X
- 3. Simplified by merged classes
- After the 2 simplification, the pair table have some duplication
- a. class 2, 3, 4, 5, 6, are the same- we can merged them
- b. class 10, 11, 12, 17 are the same- we can merged them
- Class of
- Leading Class of Trailing Char Class
- Char
- 1 [a] 7 8 9 [b]15 16 18
-
- 1 X X X X X X X X X
- [a] X
- 7 X X
- 8 X X
- 9 X
- [b] X
- 15 X X X X
- 16 X X X
- 18 X X X X
- 4. Now we use one bit to encode weather it is breakable, and use 2 bytes
- for one row, then the bit table will look like:
- 18 <- 1
-
- 1 0000 0001 1111 1111 = 0x01FF
- [a] 0000 0000 0000 0010 = 0x0002
- 7 0000 0000 0000 0110 = 0x0006
- 8 0000 0000 0100 0010 = 0x0042
- 9 0000 0000 0000 0010 = 0x0002
- [b] 0000 0000 0000 0010 = 0x0042
- 15 0000 0001 0101 0010 = 0x0152
- 16 0000 0001 1000 0010 = 0x0182
- 17 0000 0001 1100 0010 = 0x01C2
- */
- static uint16_t gJISx4051SimplifiedPair[9] = {
- 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
- };
- PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
- {
- NS_ASSERTION( (aCls1 < 9) "invalid class");
- NS_ASSERTION( (aCls2 < 9) "invalid class");
- return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
- }
- #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
- nsJISx4051Cls XXXX::GetClass(
- PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
- {
- // take care the special case in cls 15
- if( ((0x2C == aChar) || (0x2E == aChar)) &&
- (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
- {
- return kJISx4051Cls_15;
- }
-
- nsJISx4051Cls cls;
- if(gSingle->Lookup(aChar, &cls))
- return cls;
- if(gRange->Lookup(aChar, &cls))
- return cls;
-
- return kJISx4051Cls_15;
- }
- typedef enum {
- kJISx4051Cls_1 = 0,
- kJISx4051Cls_2 = 1,
- kJISx4051Cls_3 = 1,
- kJISx4051Cls_4 = 1,
- kJISx4051Cls_5 = 1,
- kJISx4051Cls_6 = 1,
- kJISx4051Cls_7 = 2,
- kJISx4051Cls_8 = 3,
- kJISx4051Cls_9 = 4,
- kJISx4051Cls_10 = 5,
- kJISx4051Cls_11 = 5,
- kJISx4051Cls_12 = 5,
- // kJISx4051Cls_13 = 0,
- // kJISx4051Cls_14 = 0,
- kJISx4051Cls_15 = 6,
- kJISx4051Cls_16 = 7,
- kJISx4051Cls_17 = 5,
- kJISx4051Cls_18 = 8,
- // kJISx4051Cls_19 = 0,
- // kJISx4051Cls_20 = 0
- } nsJISx4051Cls;
- // Table 2
- YYYY(kJISx4051Cls_1 , 0x0028),
- YYYY(kJISx4051Cls_1 , 0x005B),
- YYYY(kJISx4051Cls_1 , 0x007B),
- YYYY(kJISx4051Cls_1 , 0x2018),
- YYYY(kJISx4051Cls_1 , 0x201B),
- YYYY(kJISx4051Cls_1 , 0x201C),
- YYYY(kJISx4051Cls_1 , 0x201F),
- YYYY(kJISx4051Cls_1 , 0x3008),
- YYYY(kJISx4051Cls_1 , 0x300A),
- YYYY(kJISx4051Cls_1 , 0x300C),
- YYYY(kJISx4051Cls_1 , 0x300E),
- YYYY(kJISx4051Cls_1 , 0x3010),
- YYYY(kJISx4051Cls_1 , 0x3014),
- YYYY(kJISx4051Cls_1 , 0x3016),
- YYYY(kJISx4051Cls_1 , 0x3018),
- YYYY(kJISx4051Cls_1 , 0x301A),
- YYYY(kJISx4051Cls_1 , 0x301D),
- // Table 3
- YYYY(kJISx4051Cls_2 , 0x0029),
- YYYY(kJISx4051Cls_2 , 0x002C),
- YYYY(kJISx4051Cls_2 , 0x005D),
- YYYY(kJISx4051Cls_2 , 0x007D),
- YYYY(kJISx4051Cls_2 , 0x2019),
- YYYY(kJISx4051Cls_2 , 0x201A),
- YYYY(kJISx4051Cls_2 , 0x201D),
- YYYY(kJISx4051Cls_2 , 0x201E),
- YYYY(kJISx4051Cls_2 , 0x3001),
- YYYY(kJISx4051Cls_2 , 0x3009),
- YYYY(kJISx4051Cls_2 , 0x300B),
- YYYY(kJISx4051Cls_2 , 0x300D),
- YYYY(kJISx4051Cls_2 , 0x300F),
- YYYY(kJISx4051Cls_2 , 0x3011),
- YYYY(kJISx4051Cls_2 , 0x3015),
- YYYY(kJISx4051Cls_2 , 0x3017),
- YYYY(kJISx4051Cls_2 , 0x3019),
- YYYY(kJISx4051Cls_2 , 0x301B),
- YYYY(kJISx4051Cls_2 , 0x301E),
- YYYY(kJISx4051Cls_2 , 0x301F),
- // Table 4
- YYYY(kJISx4051Cls_3 , 0x203C),
- YYYY(kJISx4051Cls_3 , 0x2044),
- YYYY(kJISx4051Cls_3 , 0x301C),
- YYYY(kJISx4051Cls_3 , 0x3041),
- YYYY(kJISx4051Cls_3 , 0x3043),
- YYYY(kJISx4051Cls_3 , 0x3045),
- YYYY(kJISx4051Cls_3 , 0x3047),
- YYYY(kJISx4051Cls_3 , 0x3049),
- YYYY(kJISx4051Cls_3 , 0x3063),
- YYYY(kJISx4051Cls_3 , 0x3083),
- YYYY(kJISx4051Cls_3 , 0x3085),
- YYYY(kJISx4051Cls_3 , 0x3087),
- YYYY(kJISx4051Cls_3 , 0x308E),
- YYYY(kJISx4051Cls_3 , 0x309D),
- YYYY(kJISx4051Cls_3 , 0x309E),
- YYYY(kJISx4051Cls_3 , 0x30A1),
- YYYY(kJISx4051Cls_3 , 0x30A3),
- YYYY(kJISx4051Cls_3 , 0x30A5),
- YYYY(kJISx4051Cls_3 , 0x30A7),
- YYYY(kJISx4051Cls_3 , 0x30A9),
- YYYY(kJISx4051Cls_3 , 0x30C3),
- YYYY(kJISx4051Cls_3 , 0x30E3),
- YYYY(kJISx4051Cls_3 , 0x30E5),
- YYYY(kJISx4051Cls_3 , 0x30E7),
- YYYY(kJISx4051Cls_3 , 0x30EE),
- YYYY(kJISx4051Cls_3 , 0x30F5),
- YYYY(kJISx4051Cls_3 , 0x30F6),
- YYYY(kJISx4051Cls_3 , 0x30FC),
- YYYY(kJISx4051Cls_3 , 0x30FD),
- YYYY(kJISx4051Cls_3 , 0x30FE),
- // Table 5
- YYYY(kJISx4051Cls_4 , 0x0021),
- YYYY(kJISx4051Cls_4 , 0x003F),
-
- // Table 6
- YYYY(kJISx4051Cls_5 , 0x003A),
- YYYY(kJISx4051Cls_5 , 0x003B),
- YYYY(kJISx4051Cls_5 , 0x30FB),
- // Table 7
- YYYY(kJISx4051Cls_6 , 0x002E),
- YYYY(kJISx4051Cls_6 , 0x3002),
- // Table 8
- YYYY(kJISx4051Cls_7 , 0x2014),
- YYYY(kJISx4051Cls_7 , 0x2024),
- YYYY(kJISx4051Cls_7 , 0x2025),
- YYYY(kJISx4051Cls_7 , 0x2026),
- // Table 9
- YYYY(kJISx4051Cls_8 , 0x0024),
- YYYY(kJISx4051Cls_8 , 0x00A3),
- YYYY(kJISx4051Cls_8 , 0x00A5),
- YYYY(kJISx4051Cls_8 , 0x2116),
- // Table 10
- YYYY(kJISx4051Cls_9 , 0x0025),
- YYYY(kJISx4051Cls_9 , 0x00A2),
- YYYY(kJISx4051Cls_9 , 0x00B0),
- YYYY(kJISx4051Cls_9 , 0x2030),
- YYYY(kJISx4051Cls_9 , 0x2031),
- YYYY(kJISx4051Cls_9 , 0x2032),
- YYYY(kJISx4051Cls_9 , 0x2033),
- // Table 1
- YYYY(kJISx4051Cls_10, 0x3000),
- // Table 1
- ZZZZ(kJISx4051Cls_11, 0x3000),
|