UnicodeIcu.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. /*
  2. * Copyright (C) 2006 George Staikos <staikos@kde.org>
  3. * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
  4. * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Library General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Library General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Library General Public License
  17. * along with this library; see the file COPYING.LIB. If not, write to
  18. * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19. * Boston, MA 02110-1301, USA.
  20. *
  21. */
  22. #ifndef WTF_UNICODE_ICU_H
  23. #define WTF_UNICODE_ICU_H
  24. #if USE(ICU_UNICODE)
  25. #include <stdlib.h>
  26. #include <unicode/uchar.h>
  27. #include <unicode/uscript.h>
  28. #include <unicode/ustring.h>
  29. #include <unicode/utf16.h>
  30. namespace WTF {
  31. namespace Unicode {
  32. enum Direction {
  33. LeftToRight = U_LEFT_TO_RIGHT,
  34. RightToLeft = U_RIGHT_TO_LEFT,
  35. EuropeanNumber = U_EUROPEAN_NUMBER,
  36. EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
  37. EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
  38. ArabicNumber = U_ARABIC_NUMBER,
  39. CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
  40. BlockSeparator = U_BLOCK_SEPARATOR,
  41. SegmentSeparator = U_SEGMENT_SEPARATOR,
  42. WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
  43. OtherNeutral = U_OTHER_NEUTRAL,
  44. LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
  45. LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
  46. RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
  47. RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
  48. RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
  49. PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
  50. NonSpacingMark = U_DIR_NON_SPACING_MARK,
  51. BoundaryNeutral = U_BOUNDARY_NEUTRAL
  52. };
  53. enum DecompositionType {
  54. DecompositionNone = U_DT_NONE,
  55. DecompositionCanonical = U_DT_CANONICAL,
  56. DecompositionCompat = U_DT_COMPAT,
  57. DecompositionCircle = U_DT_CIRCLE,
  58. DecompositionFinal = U_DT_FINAL,
  59. DecompositionFont = U_DT_FONT,
  60. DecompositionFraction = U_DT_FRACTION,
  61. DecompositionInitial = U_DT_INITIAL,
  62. DecompositionIsolated = U_DT_ISOLATED,
  63. DecompositionMedial = U_DT_MEDIAL,
  64. DecompositionNarrow = U_DT_NARROW,
  65. DecompositionNoBreak = U_DT_NOBREAK,
  66. DecompositionSmall = U_DT_SMALL,
  67. DecompositionSquare = U_DT_SQUARE,
  68. DecompositionSub = U_DT_SUB,
  69. DecompositionSuper = U_DT_SUPER,
  70. DecompositionVertical = U_DT_VERTICAL,
  71. DecompositionWide = U_DT_WIDE,
  72. };
  73. enum CharCategory {
  74. NoCategory = 0,
  75. Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
  76. Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
  77. Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
  78. Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
  79. Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
  80. Letter_Other = U_MASK(U_OTHER_LETTER),
  81. Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
  82. Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
  83. Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
  84. Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
  85. Number_Letter = U_MASK(U_LETTER_NUMBER),
  86. Number_Other = U_MASK(U_OTHER_NUMBER),
  87. Separator_Space = U_MASK(U_SPACE_SEPARATOR),
  88. Separator_Line = U_MASK(U_LINE_SEPARATOR),
  89. Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
  90. Other_Control = U_MASK(U_CONTROL_CHAR),
  91. Other_Format = U_MASK(U_FORMAT_CHAR),
  92. Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
  93. Other_Surrogate = U_MASK(U_SURROGATE),
  94. Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
  95. Punctuation_Open = U_MASK(U_START_PUNCTUATION),
  96. Punctuation_Close = U_MASK(U_END_PUNCTUATION),
  97. Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
  98. Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
  99. Symbol_Math = U_MASK(U_MATH_SYMBOL),
  100. Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
  101. Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
  102. Symbol_Other = U_MASK(U_OTHER_SYMBOL),
  103. Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
  104. Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
  105. };
  106. inline UChar32 foldCase(UChar32 c)
  107. {
  108. return u_foldCase(c, U_FOLD_CASE_DEFAULT);
  109. }
  110. inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
  111. {
  112. UErrorCode status = U_ZERO_ERROR;
  113. int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
  114. *error = !U_SUCCESS(status);
  115. return realLength;
  116. }
  117. inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
  118. {
  119. UErrorCode status = U_ZERO_ERROR;
  120. int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
  121. *error = !!U_FAILURE(status);
  122. return realLength;
  123. }
  124. inline UChar32 toLower(UChar32 c)
  125. {
  126. return u_tolower(c);
  127. }
  128. inline UChar32 toUpper(UChar32 c)
  129. {
  130. return u_toupper(c);
  131. }
  132. inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
  133. {
  134. UErrorCode status = U_ZERO_ERROR;
  135. int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
  136. *error = !!U_FAILURE(status);
  137. return realLength;
  138. }
  139. inline UChar32 toTitleCase(UChar32 c)
  140. {
  141. return u_totitle(c);
  142. }
  143. inline bool isArabicChar(UChar32 c)
  144. {
  145. return ublock_getCode(c) == UBLOCK_ARABIC;
  146. }
  147. inline bool isAlphanumeric(UChar32 c)
  148. {
  149. return u_isalnum(c);
  150. }
  151. inline bool isSeparatorSpace(UChar32 c)
  152. {
  153. return u_charType(c) == U_SPACE_SEPARATOR;
  154. }
  155. inline bool isPrintableChar(UChar32 c)
  156. {
  157. return !!u_isprint(c);
  158. }
  159. inline bool isPunct(UChar32 c)
  160. {
  161. return !!u_ispunct(c);
  162. }
  163. #define WK_LB_CONDITIONAL_JAPANESE_STARTER 37
  164. inline bool requiresComplexContextForWordBreaking(UChar32 c)
  165. {
  166. int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
  167. return prop == U_LB_COMPLEX_CONTEXT || prop == WK_LB_CONDITIONAL_JAPANESE_STARTER || prop == U_LB_IDEOGRAPHIC;
  168. }
  169. inline UChar32 mirroredChar(UChar32 c)
  170. {
  171. return u_charMirror(c);
  172. }
  173. inline CharCategory category(UChar32 c)
  174. {
  175. return static_cast<CharCategory>(U_GET_GC_MASK(c));
  176. }
  177. inline Direction direction(UChar32 c)
  178. {
  179. return static_cast<Direction>(u_charDirection(c));
  180. }
  181. inline bool isLower(UChar32 c)
  182. {
  183. return !!u_islower(c);
  184. }
  185. inline uint8_t combiningClass(UChar32 c)
  186. {
  187. return u_getCombiningClass(c);
  188. }
  189. inline DecompositionType decompositionType(UChar32 c)
  190. {
  191. return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
  192. }
  193. inline int umemcasecmp(const UChar* a, const UChar* b, int len)
  194. {
  195. return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
  196. }
  197. } // namespace Unicode
  198. } // namespace WTF
  199. #endif // USE(ICU_UNICODE)
  200. #endif // WTF_UNICODE_ICU_H