GreekCasing.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "GreekCasing.h"
  6. #include "nsUnicharUtils.h"
  7. // Custom uppercase mapping for Greek; see bug 307039 for details
  8. #define GREEK_LOWER_ALPHA 0x03B1
  9. #define GREEK_LOWER_ALPHA_TONOS 0x03AC
  10. #define GREEK_LOWER_ALPHA_OXIA 0x1F71
  11. #define GREEK_LOWER_EPSILON 0x03B5
  12. #define GREEK_LOWER_EPSILON_TONOS 0x03AD
  13. #define GREEK_LOWER_EPSILON_OXIA 0x1F73
  14. #define GREEK_LOWER_ETA 0x03B7
  15. #define GREEK_LOWER_ETA_TONOS 0x03AE
  16. #define GREEK_LOWER_ETA_OXIA 0x1F75
  17. #define GREEK_LOWER_IOTA 0x03B9
  18. #define GREEK_LOWER_IOTA_TONOS 0x03AF
  19. #define GREEK_LOWER_IOTA_OXIA 0x1F77
  20. #define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA
  21. #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390
  22. #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3
  23. #define GREEK_LOWER_OMICRON 0x03BF
  24. #define GREEK_LOWER_OMICRON_TONOS 0x03CC
  25. #define GREEK_LOWER_OMICRON_OXIA 0x1F79
  26. #define GREEK_LOWER_UPSILON 0x03C5
  27. #define GREEK_LOWER_UPSILON_TONOS 0x03CD
  28. #define GREEK_LOWER_UPSILON_OXIA 0x1F7B
  29. #define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB
  30. #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0
  31. #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3
  32. #define GREEK_LOWER_OMEGA 0x03C9
  33. #define GREEK_LOWER_OMEGA_TONOS 0x03CE
  34. #define GREEK_LOWER_OMEGA_OXIA 0x1F7D
  35. #define GREEK_UPPER_ALPHA 0x0391
  36. #define GREEK_UPPER_EPSILON 0x0395
  37. #define GREEK_UPPER_ETA 0x0397
  38. #define GREEK_UPPER_IOTA 0x0399
  39. #define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA
  40. #define GREEK_UPPER_OMICRON 0x039F
  41. #define GREEK_UPPER_UPSILON 0x03A5
  42. #define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB
  43. #define GREEK_UPPER_OMEGA 0x03A9
  44. #define GREEK_UPPER_ALPHA_TONOS 0x0386
  45. #define GREEK_UPPER_ALPHA_OXIA 0x1FBB
  46. #define GREEK_UPPER_EPSILON_TONOS 0x0388
  47. #define GREEK_UPPER_EPSILON_OXIA 0x1FC9
  48. #define GREEK_UPPER_ETA_TONOS 0x0389
  49. #define GREEK_UPPER_ETA_OXIA 0x1FCB
  50. #define GREEK_UPPER_IOTA_TONOS 0x038A
  51. #define GREEK_UPPER_IOTA_OXIA 0x1FDB
  52. #define GREEK_UPPER_OMICRON_TONOS 0x038C
  53. #define GREEK_UPPER_OMICRON_OXIA 0x1FF9
  54. #define GREEK_UPPER_UPSILON_TONOS 0x038E
  55. #define GREEK_UPPER_UPSILON_OXIA 0x1FEB
  56. #define GREEK_UPPER_OMEGA_TONOS 0x038F
  57. #define GREEK_UPPER_OMEGA_OXIA 0x1FFB
  58. #define COMBINING_ACUTE_ACCENT 0x0301
  59. #define COMBINING_DIAERESIS 0x0308
  60. #define COMBINING_ACUTE_TONE_MARK 0x0341
  61. #define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344
  62. namespace mozilla {
  63. uint32_t
  64. GreekCasing::UpperCase(uint32_t aCh, GreekCasing::State& aState)
  65. {
  66. switch (aCh) {
  67. case GREEK_UPPER_ALPHA:
  68. case GREEK_LOWER_ALPHA:
  69. aState = kAlpha;
  70. return GREEK_UPPER_ALPHA;
  71. case GREEK_UPPER_EPSILON:
  72. case GREEK_LOWER_EPSILON:
  73. aState = kEpsilon;
  74. return GREEK_UPPER_EPSILON;
  75. case GREEK_UPPER_ETA:
  76. case GREEK_LOWER_ETA:
  77. aState = kEta;
  78. return GREEK_UPPER_ETA;
  79. case GREEK_UPPER_IOTA:
  80. aState = kIota;
  81. return GREEK_UPPER_IOTA;
  82. case GREEK_UPPER_OMICRON:
  83. case GREEK_LOWER_OMICRON:
  84. aState = kOmicron;
  85. return GREEK_UPPER_OMICRON;
  86. case GREEK_UPPER_UPSILON:
  87. switch (aState) {
  88. case kOmicron:
  89. aState = kOmicronUpsilon;
  90. break;
  91. default:
  92. aState = kUpsilon;
  93. break;
  94. }
  95. return GREEK_UPPER_UPSILON;
  96. case GREEK_UPPER_OMEGA:
  97. case GREEK_LOWER_OMEGA:
  98. aState = kOmega;
  99. return GREEK_UPPER_OMEGA;
  100. // iota and upsilon may be the second vowel of a diphthong
  101. case GREEK_LOWER_IOTA:
  102. switch (aState) {
  103. case kAlphaAcc:
  104. case kEpsilonAcc:
  105. case kOmicronAcc:
  106. case kUpsilonAcc:
  107. aState = kStart;
  108. return GREEK_UPPER_IOTA_DIALYTIKA;
  109. default:
  110. break;
  111. }
  112. aState = kIota;
  113. return GREEK_UPPER_IOTA;
  114. case GREEK_LOWER_UPSILON:
  115. switch (aState) {
  116. case kAlphaAcc:
  117. case kEpsilonAcc:
  118. case kEtaAcc:
  119. case kOmicronAcc:
  120. aState = kStart;
  121. return GREEK_UPPER_UPSILON_DIALYTIKA;
  122. case kOmicron:
  123. aState = kOmicronUpsilon;
  124. break;
  125. default:
  126. aState = kUpsilon;
  127. break;
  128. }
  129. return GREEK_UPPER_UPSILON;
  130. case GREEK_UPPER_IOTA_DIALYTIKA:
  131. case GREEK_LOWER_IOTA_DIALYTIKA:
  132. case GREEK_UPPER_UPSILON_DIALYTIKA:
  133. case GREEK_LOWER_UPSILON_DIALYTIKA:
  134. case COMBINING_DIAERESIS:
  135. aState = kDiaeresis;
  136. return ToUpperCase(aCh);
  137. // remove accent if it follows a vowel or diaeresis,
  138. // and set appropriate state for diphthong detection
  139. case COMBINING_ACUTE_ACCENT:
  140. case COMBINING_ACUTE_TONE_MARK:
  141. switch (aState) {
  142. case kAlpha:
  143. aState = kAlphaAcc;
  144. return uint32_t(-1); // omit this char from result string
  145. case kEpsilon:
  146. aState = kEpsilonAcc;
  147. return uint32_t(-1);
  148. case kEta:
  149. aState = kEtaAcc;
  150. return uint32_t(-1);
  151. case kIota:
  152. aState = kIotaAcc;
  153. return uint32_t(-1);
  154. case kOmicron:
  155. aState = kOmicronAcc;
  156. return uint32_t(-1);
  157. case kUpsilon:
  158. aState = kUpsilonAcc;
  159. return uint32_t(-1);
  160. case kOmicronUpsilon:
  161. aState = kStart; // this completed a diphthong
  162. return uint32_t(-1);
  163. case kOmega:
  164. aState = kOmegaAcc;
  165. return uint32_t(-1);
  166. case kDiaeresis:
  167. aState = kStart;
  168. return uint32_t(-1);
  169. default:
  170. break;
  171. }
  172. break;
  173. // combinations with dieresis+accent just strip the accent,
  174. // and reset to start state (don't form diphthong with following vowel)
  175. case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
  176. case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
  177. aState = kStart;
  178. return GREEK_UPPER_IOTA_DIALYTIKA;
  179. case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
  180. case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
  181. aState = kStart;
  182. return GREEK_UPPER_UPSILON_DIALYTIKA;
  183. case COMBINING_GREEK_DIALYTIKA_TONOS:
  184. aState = kStart;
  185. return COMBINING_DIAERESIS;
  186. // strip accents from vowels, and note the vowel seen so that we can detect
  187. // diphthongs where diaeresis needs to be added
  188. case GREEK_LOWER_ALPHA_TONOS:
  189. case GREEK_LOWER_ALPHA_OXIA:
  190. case GREEK_UPPER_ALPHA_TONOS:
  191. case GREEK_UPPER_ALPHA_OXIA:
  192. aState = kAlphaAcc;
  193. return GREEK_UPPER_ALPHA;
  194. case GREEK_LOWER_EPSILON_TONOS:
  195. case GREEK_LOWER_EPSILON_OXIA:
  196. case GREEK_UPPER_EPSILON_TONOS:
  197. case GREEK_UPPER_EPSILON_OXIA:
  198. aState = kEpsilonAcc;
  199. return GREEK_UPPER_EPSILON;
  200. case GREEK_LOWER_ETA_TONOS:
  201. case GREEK_LOWER_ETA_OXIA:
  202. case GREEK_UPPER_ETA_TONOS:
  203. case GREEK_UPPER_ETA_OXIA:
  204. aState = kEtaAcc;
  205. return GREEK_UPPER_ETA;
  206. case GREEK_LOWER_IOTA_TONOS:
  207. case GREEK_LOWER_IOTA_OXIA:
  208. case GREEK_UPPER_IOTA_TONOS:
  209. case GREEK_UPPER_IOTA_OXIA:
  210. aState = kIotaAcc;
  211. return GREEK_UPPER_IOTA;
  212. case GREEK_LOWER_OMICRON_TONOS:
  213. case GREEK_LOWER_OMICRON_OXIA:
  214. case GREEK_UPPER_OMICRON_TONOS:
  215. case GREEK_UPPER_OMICRON_OXIA:
  216. aState = kOmicronAcc;
  217. return GREEK_UPPER_OMICRON;
  218. case GREEK_LOWER_UPSILON_TONOS:
  219. case GREEK_LOWER_UPSILON_OXIA:
  220. case GREEK_UPPER_UPSILON_TONOS:
  221. case GREEK_UPPER_UPSILON_OXIA:
  222. switch (aState) {
  223. case kOmicron:
  224. aState = kStart; // this completed a diphthong
  225. break;
  226. default:
  227. aState = kUpsilonAcc;
  228. break;
  229. }
  230. return GREEK_UPPER_UPSILON;
  231. case GREEK_LOWER_OMEGA_TONOS:
  232. case GREEK_LOWER_OMEGA_OXIA:
  233. case GREEK_UPPER_OMEGA_TONOS:
  234. case GREEK_UPPER_OMEGA_OXIA:
  235. aState = kOmegaAcc;
  236. return GREEK_UPPER_OMEGA;
  237. }
  238. // all other characters just reset the state, and use standard mappings
  239. aState = kStart;
  240. return ToUpperCase(aCh);
  241. }
  242. } // namespace mozilla