ucnid.tab 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. ; Table of UCNs which are valid in identifiers.
  2. ; Copyright (C) 2003-2015 Free Software Foundation, Inc.
  3. ;
  4. ; This program is free software; you can redistribute it and/or modify it
  5. ; under the terms of the GNU General Public License as published by the
  6. ; Free Software Foundation; either version 3, or (at your option) any
  7. ; later version.
  8. ;
  9. ; This program is distributed in the hope that it will be useful,
  10. ; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. ; GNU General Public License for more details.
  13. ;
  14. ; You should have received a copy of the GNU General Public License
  15. ; along with this program; see the file COPYING3. If not see
  16. ; <http://www.gnu.org/licenses/>.
  17. ;
  18. ; This file reproduces the table in ISO/IEC 9899:1999 (C99) Annex
  19. ; D, which is itself a reproduction from ISO/IEC TR 10176:1998, and
  20. ; the similar table from ISO/IEC 14882:1988 (C++98) Annex E, which is
  21. ; a reproduction of ISO/IEC PDTR 10176. Unfortunately these tables
  22. ; are not identical. It also reproduces the somewhat different tables
  23. ; in C11 and C++11, which are identical to each other.
  24. [C99]
  25. ; Latin
  26. 00aa 00ba 00c0-00d6 00d8-00f6 00f8-01f5 01fa-0217 0250-02a8 1e00-1e9b
  27. 1ea0-1ef9 207f
  28. ; Greek
  29. 0386 0388-038a 038c 038e-03a1 03a3-03ce 03d0-03d6 03da 03dc 03de 03e0
  30. 03e2-03f3 1f00-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b
  31. 1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fbc 1fc2-1fc4 1fc6-1fcc 1fd0-1fd3
  32. 1fd6-1fdb 1fe0-1fec 1ff2-1ff4 1ff6-1ffc
  33. ; Cyrillic
  34. 0401-040c 040e-044f 0451-045c 045e-0481 0490-04c4 04c7-04c8 04cb-04cc
  35. 04d0-04eb 04ee-04f5 04f8-04f9
  36. ; Armenian
  37. 0531-0556 0561-0587
  38. ; Hebrew
  39. 05b0-05b9 05bb-05bd 05bf 05c1-05c2 05d0-05ea 05f0-05f2
  40. ; Arabic
  41. 0621-063a 0640-0652 0670-06b7 06ba-06be 06c0-06ce 06d0-06dc 06e5-06e8
  42. 06ea-06ed
  43. ; Devanagari
  44. 0901-0903 0905-0939 093e-094d 0950-0952 0958-0963
  45. ; Bengali
  46. 0981-0983 0985-098c 098f-0990 0993-09a8 09aa-09b0 09b2 09b6-09b9
  47. 09be-09c4 09c7-09c8 09cb-09cd 09dc-09dd 09df-09e3 09f0-09f1
  48. ; Gurmukhi
  49. 0a02 0a05-0a0a 0a0f-0a10 0a13-0a28 0a2a-0a30 0a32-0a33 0a35-0a36
  50. 0a38-0a39 0a3e-0a42 0a47-0a48 0a4b-0a4d 0a59-0a5c 0a5e 0a74
  51. ; Gujarati
  52. 0a81-0a83 0a85-0a8b 0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3
  53. 0ab5-0ab9 0abd-0ac5 0ac7-0ac9 0acb-0acd 0ad0 0ae0
  54. ; Oriya
  55. 0b01-0b03 0b05-0b0c 0b0f-0b10 0b13-0b28 0b2a-0b30 0b32-0b33 0b36-0b39
  56. 0b3e-0b43 0b47-0b48 0b4b-0b4d 0b5c-0b5d 0b5f-0b61
  57. ; Tamil
  58. 0b82-0b83 0b85-0b8a 0b8e-0b90 0b92-0b95 0b99-0b9a 0b9c 0b9e-0b9f
  59. 0ba3-0ba4 0ba8-0baa 0bae-0bb5 0bb7-0bb9 0bbe-0bc2 0bc6-0bc8 0bca-0bcd
  60. ; Telugu
  61. 0c01-0c03 0c05-0c0c 0c0e-0c10 0c12-0c28 0c2a-0c33 0c35-0c39 0c3e-0c44
  62. 0c46-0c48 0c4a-0c4d 0c60-0c61
  63. ; Kannada
  64. 0c82-0c83 0c85-0c8c 0c8e-0c90 0c92-0ca8 0caa-0cb3 0cb5-0cb9 0cbe-0cc4
  65. 0cc6-0cc8 0cca-0ccd 0cde 0ce0-0ce1
  66. ; Malayalam
  67. 0d02-0d03 0d05-0d0c 0d0e-0d10 0d12-0d28 0d2a-0d39 0d3e-0d43 0d46-0d48
  68. 0d4a-0d4d 0d60-0d61
  69. # CORRECTION: exclude 0e50-0e59 from the Thai range as it also appears
  70. # in the Digits range below.
  71. ; Thai
  72. 0e01-0e3a 0e40-0e49 0e5a-0e5b
  73. ; Lao
  74. 0e81-0e82 0e84 0e87-0e88 0e8a 0e8d 0e94-0e97 0e99-0e9f 0ea1-0ea3 0ea5
  75. 0ea7 0eaa-0eab 0ead-0eae 0eb0-0eb9 0ebb-0ebd 0ec0-0ec4 0ec6 0ec8-0ecd
  76. 0edc-0edd
  77. ; Tibetan
  78. 0f00 0f18-0f19 0f35 0f37 0f39 0f3e-0f47 0f49-0f69 0f71-0f84 0f86-0f8b
  79. 0f90-0f95 0f97 0f99-0fad 0fb1-0fb7 0fb9
  80. ; Georgian
  81. 10a0-10c5 10d0-10f6
  82. ; Hiragana
  83. 3041-3093 309b-309c
  84. ; Katakana
  85. 30a1-30f6 30fb-30fc
  86. ; Bopomofo
  87. 3105-312c
  88. ; CJK Unified Ideographs
  89. 4e00-9fa5
  90. ; Hangul
  91. ac00-d7a3
  92. ; Special characters
  93. 00b5 00b7 02b0-02b8 02bb 02bd-02c1 02d0-02d1 02e0-02e4 037a 0559 093d
  94. 0b3d 1fbe 203f-2040 2102 2107 210a-2113 2115 2118-211d 2124 2126 2128
  95. 212a-2131 2133-2138 2160-2182 3005-3007 3021-3029
  96. [C99DIG]
  97. 0660-0669 06f0-06f9 0966-096f 09e6-09ef 0a66-0a6f 0ae6-0aef 0b66-0b6f
  98. 0be7-0bef 0c66-0c6f 0ce6-0cef 0d66-0d6f 0e50-0e59 0ed0-0ed9 0f20-0f33
  99. [CXX]
  100. ; Latin
  101. 00c0-00d6 00d8-00f6 00f8-01f5 01fa-0217 0250-02a8 1e00-1e9a 1ea0-1ef9
  102. ; Greek
  103. 0384 0388-038a 038c 038e-03a1 03a3-03ce 03d0-03d6 03da 03dc 03de 03e0
  104. 03e2-03f3 1f00-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b
  105. 1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fbc 1fc2-1fc4 1fc6-1fcc 1fd0-1fd3
  106. 1fd6-1fdb 1fe0-1fec 1ff2-1ff4 1ff6-1ffc
  107. ; Cyrillic
  108. 0401-040d 040f-044f 0451-045c 045e-0481 0490-04c4 04c7-04c8 04cb-04cc
  109. 04d0-04eb 04ee-04f5 04f8-04f9
  110. ; Armenian
  111. 0531-0556 0561-0587
  112. ; Hebrew
  113. 05d0-05ea 05f0-05f4
  114. ; Arabic
  115. 0621-063a 0640-0652 0670-06b7 06ba-06be 06c0-06ce 06e5-06e7
  116. ; Devanagari
  117. 0905-0939 0958-0962
  118. ; Bengali
  119. 0985-098c 098f-0990 0993-09a8 09aa-09b0 09b2 09b6-09b9 09dc-09dd
  120. 09df-09e1 09f0-09f1
  121. ; Gurmukhi
  122. 0a05-0a0a 0a0f-0a10 0a13-0a28 0a2a-0a30 0a32-0a33 0a35-0a36 0a38-0a39
  123. 0a59-0a5c 0a5e
  124. ; Gujarati
  125. 0a85-0a8b 0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3 0ab5-0ab9 0ae0
  126. ; Oriya
  127. 0b05-0b0c 0b0f-0b10 0b13-0b28 0b2a-0b30 0b32-0b33 0b36-0b39 0b5c-0b5d
  128. 0b5f-0b61
  129. ; Tamil
  130. 0b85-0b8a 0b8e-0b90 0b92-0b95 0b99-0b9a 0b9c 0b9e-0b9f 0ba3-0ba4
  131. 0ba8-0baa 0bae-0bb5 0bb7-0bb9
  132. ; Telugu
  133. 0c05-0c0c 0c0e-0c10 0c12-0c28 0c2a-0c33 0c35-0c39 0c60-0c61
  134. ; Kannada
  135. 0c85-0c8c 0c8e-0c90 0c92-0ca8 0caa-0cb3 0cb5-0cb9 0ce0-0ce1
  136. ; Malayalam
  137. 0d05-0d0c 0d0e-0d10 0d12-0d28 0d2a-0d39 0d60-0d61
  138. ; Thai
  139. 0e01-0e30 0e32-0e33 0e40-0e46 0e4f-0e5b
  140. ; Digits
  141. 0e50-0e59
  142. ; Lao
  143. 0e81-0e82 0e84 0e87-0e88 0e8a 0e8d 0e94-0e97 0e99-0e9f 0ea1-0ea3 0ea5
  144. 0ea7 0eaa-0eab 0ead-0eb0 0eb2 0eb3 0ebd 0ec0-0ec4 0ec6
  145. ; Georgian
  146. 10a0-10c5 10d0-10f6
  147. ; Hiragana
  148. 3041-3094 309b-309e
  149. ; Katakana
  150. 30a1-30fe
  151. ; Bopomofo
  152. 3105-312c
  153. ; Hangul
  154. 1100-1159 1161-11a2 11a8-11f9
  155. ; CJK Unified Ideographs
  156. f900-fa2d fb1f-fb36 fb38-fb3c fb3e fb40-fb41 fb42-fb44 fb46-fbb1
  157. fbd3-fd3f fd50-fd8f fd92-fdc7 fdf0-fdfb fe70-fe72 fe74 fe76-fefc
  158. ff21-ff3a ff41-ff5a ff66-ffbe ffc2-ffc7 ffca-ffcf ffd2-ffd7
  159. ffda-ffdc 4e00-9fa5
  160. [C11]
  161. ; Group 1
  162. 00a8 00aa 00ad 00af 00b2-00b5 00b7-00ba 00bc-00be 00c0-00d6 00d8-00f6
  163. 00f8-00ff
  164. ; Group 2, minus characters under C11NOSTART
  165. 0100-02ff 0370-167f 1681-180d 180f-1dbf 1e00-1fff
  166. ; Group 3
  167. 200b-200d 202a-202e 203f-2040 2054 2060-206f
  168. ; Group 4, minus characters under C11NOSTART
  169. 2070-20cf 2100-218f 2460-24ff 2776-2793 2c00-2dff 2e80-2fff
  170. ; Group 5
  171. 3004-3007 3021-302f 3031-303f
  172. ; Group 6
  173. 3040-d7ff
  174. ; Group 7, minus characters under C11NOSTART
  175. f900-fd3d fd40-fdcf fdf0-fe1f fe30-fe44 fe47-fffd
  176. ; Group 8
  177. 10000-1fffd 20000-2fffd 30000-3fffd 40000-4fffd 50000-5fffd
  178. 60000-6fffd 70000-7fffd 80000-8fffd 90000-9fffd a0000-afffd
  179. b0000-bfffd c0000-cfffd d0000-dfffd e0000-efffd
  180. [C11NOSTART]
  181. ; Group 1
  182. 0300-036f 1dc0-1dff 20d0-20ff fe20-fe2f