nsUnicharUtils.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #ifndef nsUnicharUtils_h__
  6. #define nsUnicharUtils_h__
  7. #include "nsStringGlue.h"
  8. /* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
  9. /* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables */
  10. #define IS_CJ_CHAR(u) \
  11. ((0x2e80u <= (u) && (u) <= 0x312fu) || \
  12. (0x3190u <= (u) && (u) <= 0xabffu) || \
  13. (0xf900u <= (u) && (u) <= 0xfaffu) || \
  14. (0xff00u <= (u) && (u) <= 0xffefu) )
  15. #define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B)
  16. void ToLowerCase(nsAString&);
  17. void ToUpperCase(nsAString&);
  18. void ToLowerCase(const nsAString& aSource, nsAString& aDest);
  19. void ToUpperCase(const nsAString& aSource, nsAString& aDest);
  20. uint32_t ToLowerCase(uint32_t);
  21. uint32_t ToUpperCase(uint32_t);
  22. uint32_t ToTitleCase(uint32_t);
  23. void ToLowerCase(const char16_t*, char16_t*, uint32_t);
  24. void ToUpperCase(const char16_t*, char16_t*, uint32_t);
  25. inline bool IsUpperCase(uint32_t c) {
  26. return ToLowerCase(c) != c;
  27. }
  28. inline bool IsLowerCase(uint32_t c) {
  29. return ToUpperCase(c) != c;
  30. }
  31. #ifdef MOZILLA_INTERNAL_API
  32. class nsCaseInsensitiveStringComparator : public nsStringComparator
  33. {
  34. public:
  35. virtual int32_t operator() (const char16_t*,
  36. const char16_t*,
  37. uint32_t,
  38. uint32_t) const override;
  39. };
  40. class nsCaseInsensitiveUTF8StringComparator : public nsCStringComparator
  41. {
  42. public:
  43. virtual int32_t operator() (const char*,
  44. const char*,
  45. uint32_t,
  46. uint32_t) const override;
  47. };
  48. class nsCaseInsensitiveStringArrayComparator
  49. {
  50. public:
  51. template<class A, class B>
  52. bool Equals(const A& a, const B& b) const {
  53. return a.Equals(b, nsCaseInsensitiveStringComparator());
  54. }
  55. };
  56. class nsASCIICaseInsensitiveStringComparator : public nsStringComparator
  57. {
  58. public:
  59. nsASCIICaseInsensitiveStringComparator() {}
  60. virtual int operator() (const char16_t*,
  61. const char16_t*,
  62. uint32_t,
  63. uint32_t) const override;
  64. };
  65. inline bool
  66. CaseInsensitiveFindInReadable(const nsAString& aPattern,
  67. nsAString::const_iterator& aSearchStart,
  68. nsAString::const_iterator& aSearchEnd)
  69. {
  70. return FindInReadable(aPattern, aSearchStart, aSearchEnd,
  71. nsCaseInsensitiveStringComparator());
  72. }
  73. inline bool
  74. CaseInsensitiveFindInReadable(const nsAString& aPattern,
  75. const nsAString& aHay)
  76. {
  77. nsAString::const_iterator searchBegin, searchEnd;
  78. return FindInReadable(aPattern, aHay.BeginReading(searchBegin),
  79. aHay.EndReading(searchEnd),
  80. nsCaseInsensitiveStringComparator());
  81. }
  82. #endif // MOZILLA_INTERNAL_API
  83. int32_t
  84. CaseInsensitiveCompare(const char16_t *a, const char16_t *b, uint32_t len);
  85. int32_t
  86. CaseInsensitiveCompare(const char* aLeft, const char* aRight,
  87. uint32_t aLeftBytes, uint32_t aRightBytes);
  88. /**
  89. * This function determines whether the UTF-8 sequence pointed to by aLeft is
  90. * case-insensitively-equal to the UTF-8 sequence pointed to by aRight.
  91. *
  92. * aLeftEnd marks the first memory location past aLeft that is not part of
  93. * aLeft; aRightEnd similarly marks the end of aRight.
  94. *
  95. * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.
  96. *
  97. * The function stores the addresses of the next characters in the sequence
  98. * into aLeftNext and aRightNext. It's up to the caller to make sure that the
  99. * returned pointers are valid -- i.e. the function may return aLeftNext >=
  100. * aLeftEnd or aRightNext >= aRightEnd.
  101. *
  102. * If the function encounters invalid text, it sets aErr to true and returns
  103. * false, possibly leaving aLeftNext and aRightNext uninitialized. If the
  104. * function returns true, aErr is guaranteed to be false and both aLeftNext and
  105. * aRightNext are guaranteed to be initialized.
  106. */
  107. bool
  108. CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
  109. const char* aLeftEnd, const char* aRightEnd,
  110. const char** aLeftNext, const char** aRightNext,
  111. bool* aErr);
  112. namespace mozilla {
  113. /**
  114. * Hash a UTF8 string as though it were a UTF16 string.
  115. *
  116. * The value returned is the same as if we converted the string to UTF16 and
  117. * then ran HashString() on the result.
  118. *
  119. * The given |length| is in bytes.
  120. */
  121. uint32_t
  122. HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr);
  123. bool
  124. IsSegmentBreakSkipChar(uint32_t u);
  125. } // namespace mozilla
  126. #endif /* nsUnicharUtils_h__ */