nsUnicodeProperties.h 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
  2. /* vim:set ts=4 sw=4 sts=4 et cindent: */
  3. /* This Source Code Form is subject to the terms of the Mozilla Public
  4. * License, v. 2.0. If a copy of the MPL was not distributed with this
  5. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6. #ifndef NS_UNICODEPROPERTIES_H
  7. #define NS_UNICODEPROPERTIES_H
  8. #include "nsBidiUtils.h"
  9. #include "nsIUGenCategory.h"
  10. #include "nsUnicodeScriptCodes.h"
  11. #include "harfbuzz/hb.h"
  12. #include "unicode/uchar.h"
  13. #include "unicode/uscript.h"
  14. const nsCharProps2& GetCharProps2(uint32_t aCh);
  15. namespace mozilla {
  16. namespace unicode {
  17. extern const nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
  18. /* This MUST match the values assigned by genUnicodePropertyData.pl! */
  19. enum VerticalOrientation {
  20. VERTICAL_ORIENTATION_U = 0,
  21. VERTICAL_ORIENTATION_R = 1,
  22. VERTICAL_ORIENTATION_Tu = 2,
  23. VERTICAL_ORIENTATION_Tr = 3
  24. };
  25. /* This MUST match the values assigned by genUnicodePropertyData.pl! */
  26. enum PairedBracketType {
  27. PAIRED_BRACKET_TYPE_NONE = 0,
  28. PAIRED_BRACKET_TYPE_OPEN = 1,
  29. PAIRED_BRACKET_TYPE_CLOSE = 2
  30. };
  31. enum XidmodType {
  32. XIDMOD_RECOMMENDED,
  33. XIDMOD_INCLUSION,
  34. XIDMOD_UNCOMMON_USE,
  35. XIDMOD_TECHNICAL,
  36. XIDMOD_OBSOLETE,
  37. XIDMOD_ASPIRATIONAL,
  38. XIDMOD_LIMITED_USE,
  39. XIDMOD_EXCLUSION,
  40. XIDMOD_NOT_XID,
  41. XIDMOD_NOT_NFKC,
  42. XIDMOD_DEFAULT_IGNORABLE,
  43. XIDMOD_DEPRECATED,
  44. XIDMOD_NOT_CHARS
  45. };
  46. enum EmojiPresentation {
  47. TextOnly = 0,
  48. TextDefault = 1,
  49. EmojiDefault = 2
  50. };
  51. const uint32_t kVariationSelector15 = 0xFE0E; // text presentation
  52. const uint32_t kVariationSelector16 = 0xFE0F; // emoji presentation
  53. // ICU is available, so simply forward to its API
  54. extern const hb_unicode_general_category_t sICUtoHBcategory[];
  55. inline uint32_t
  56. GetMirroredChar(uint32_t aCh)
  57. {
  58. return u_charMirror(aCh);
  59. }
  60. inline bool
  61. HasMirroredChar(uint32_t aCh)
  62. {
  63. return u_isMirrored(aCh);
  64. }
  65. inline uint8_t
  66. GetCombiningClass(uint32_t aCh)
  67. {
  68. return u_getCombiningClass(aCh);
  69. }
  70. inline uint8_t
  71. GetGeneralCategory(uint32_t aCh)
  72. {
  73. return sICUtoHBcategory[u_charType(aCh)];
  74. }
  75. inline nsCharType
  76. GetBidiCat(uint32_t aCh)
  77. {
  78. return nsCharType(u_charDirection(aCh));
  79. }
  80. inline int8_t
  81. GetNumericValue(uint32_t aCh)
  82. {
  83. UNumericType type =
  84. UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
  85. return type == U_NT_DECIMAL || type == U_NT_DIGIT
  86. ? int8_t(u_getNumericValue(aCh)) : -1;
  87. }
  88. inline uint8_t
  89. GetLineBreakClass(uint32_t aCh)
  90. {
  91. return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
  92. }
  93. inline Script
  94. GetScriptCode(uint32_t aCh)
  95. {
  96. UErrorCode err = U_ZERO_ERROR;
  97. return Script(uscript_getScript(aCh, &err));
  98. }
  99. inline uint32_t
  100. GetScriptTagForCode(Script aScriptCode)
  101. {
  102. const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
  103. return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
  104. }
  105. inline PairedBracketType
  106. GetPairedBracketType(uint32_t aCh)
  107. {
  108. return PairedBracketType
  109. (u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
  110. }
  111. inline uint32_t
  112. GetPairedBracket(uint32_t aCh)
  113. {
  114. return u_getBidiPairedBracket(aCh);
  115. }
  116. inline uint32_t
  117. GetUppercase(uint32_t aCh)
  118. {
  119. return u_toupper(aCh);
  120. }
  121. inline uint32_t
  122. GetLowercase(uint32_t aCh)
  123. {
  124. return u_tolower(aCh);
  125. }
  126. inline uint32_t
  127. GetTitlecaseForLower(uint32_t aCh) // maps LC to titlecase, UC unchanged
  128. {
  129. return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
  130. }
  131. inline uint32_t
  132. GetTitlecaseForAll(uint32_t aCh) // maps both UC and LC to titlecase
  133. {
  134. return u_totitle(aCh);
  135. }
  136. inline bool
  137. IsEastAsianWidthFWH(uint32_t aCh)
  138. {
  139. switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
  140. case U_EA_FULLWIDTH:
  141. case U_EA_WIDE:
  142. case U_EA_HALFWIDTH:
  143. return true;
  144. case U_EA_AMBIGUOUS:
  145. case U_EA_NARROW:
  146. case U_EA_NEUTRAL:
  147. return false;
  148. }
  149. return false;
  150. }
  151. inline EmojiPresentation
  152. GetEmojiPresentation(uint32_t aCh)
  153. {
  154. if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
  155. return TextOnly;
  156. }
  157. if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) {
  158. return EmojiDefault;
  159. }
  160. return TextDefault;
  161. }
  162. // returns the simplified Gen Category as defined in nsIUGenCategory
  163. inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) {
  164. return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
  165. }
  166. inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
  167. return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
  168. }
  169. inline XidmodType GetIdentifierModification(uint32_t aCh) {
  170. return XidmodType(GetCharProps2(aCh).mXidmod);
  171. }
  172. uint32_t GetFullWidth(uint32_t aCh);
  173. // This is the reverse function of GetFullWidth which guarantees that
  174. // for every codepoint c, GetFullWidthInverse(GetFullWidth(c)) == c.
  175. // Note that, this function does not guarantee to convert all wide
  176. // form characters to their possible narrow form.
  177. uint32_t GetFullWidthInverse(uint32_t aCh);
  178. bool IsClusterExtender(uint32_t aCh, uint8_t aCategory);
  179. inline bool IsClusterExtender(uint32_t aCh) {
  180. return IsClusterExtender(aCh, GetGeneralCategory(aCh));
  181. }
  182. // A simple iterator for a string of char16_t codepoints that advances
  183. // by Unicode grapheme clusters
  184. class ClusterIterator
  185. {
  186. public:
  187. ClusterIterator(const char16_t* aText, uint32_t aLength)
  188. : mPos(aText), mLimit(aText + aLength)
  189. #ifdef DEBUG
  190. , mText(aText)
  191. #endif
  192. { }
  193. operator const char16_t* () const {
  194. return mPos;
  195. }
  196. bool AtEnd() const {
  197. return mPos >= mLimit;
  198. }
  199. void Next();
  200. private:
  201. const char16_t* mPos;
  202. const char16_t* mLimit;
  203. #ifdef DEBUG
  204. const char16_t* mText;
  205. #endif
  206. };
  207. // Count the number of grapheme clusters in the given string
  208. uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);
  209. // A simple reverse iterator for a string of char16_t codepoints that
  210. // advances by Unicode grapheme clusters
  211. class ClusterReverseIterator
  212. {
  213. public:
  214. ClusterReverseIterator(const char16_t* aText, uint32_t aLength)
  215. : mPos(aText + aLength), mLimit(aText)
  216. { }
  217. operator const char16_t* () const {
  218. return mPos;
  219. }
  220. bool AtEnd() const {
  221. return mPos <= mLimit;
  222. }
  223. void Next();
  224. private:
  225. const char16_t* mPos;
  226. const char16_t* mLimit;
  227. };
  228. } // end namespace unicode
  229. } // end namespace mozilla
  230. #endif /* NS_UNICODEPROPERTIES_H */