123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
- /* vim:set ts=4 sw=4 sts=4 et cindent: */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #ifndef NS_UNICODEPROPERTIES_H
- #define NS_UNICODEPROPERTIES_H
- #include "nsBidiUtils.h"
- #include "nsIUGenCategory.h"
- #include "nsUnicodeScriptCodes.h"
- #include "harfbuzz/hb.h"
- #include "unicode/uchar.h"
- #include "unicode/uscript.h"
- const nsCharProps2& GetCharProps2(uint32_t aCh);
- namespace mozilla {
- namespace unicode {
- extern const nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];
- /* This MUST match the values assigned by genUnicodePropertyData.pl! */
- enum VerticalOrientation {
- VERTICAL_ORIENTATION_U = 0,
- VERTICAL_ORIENTATION_R = 1,
- VERTICAL_ORIENTATION_Tu = 2,
- VERTICAL_ORIENTATION_Tr = 3
- };
- /* This MUST match the values assigned by genUnicodePropertyData.pl! */
- enum PairedBracketType {
- PAIRED_BRACKET_TYPE_NONE = 0,
- PAIRED_BRACKET_TYPE_OPEN = 1,
- PAIRED_BRACKET_TYPE_CLOSE = 2
- };
- enum XidmodType {
- XIDMOD_RECOMMENDED,
- XIDMOD_INCLUSION,
- XIDMOD_UNCOMMON_USE,
- XIDMOD_TECHNICAL,
- XIDMOD_OBSOLETE,
- XIDMOD_ASPIRATIONAL,
- XIDMOD_LIMITED_USE,
- XIDMOD_EXCLUSION,
- XIDMOD_NOT_XID,
- XIDMOD_NOT_NFKC,
- XIDMOD_DEFAULT_IGNORABLE,
- XIDMOD_DEPRECATED,
- XIDMOD_NOT_CHARS
- };
- enum EmojiPresentation {
- TextOnly = 0,
- TextDefault = 1,
- EmojiDefault = 2
- };
- const uint32_t kVariationSelector15 = 0xFE0E; // text presentation
- const uint32_t kVariationSelector16 = 0xFE0F; // emoji presentation
- // ICU is available, so simply forward to its API
- extern const hb_unicode_general_category_t sICUtoHBcategory[];
- inline uint32_t
- GetMirroredChar(uint32_t aCh)
- {
- return u_charMirror(aCh);
- }
- inline bool
- HasMirroredChar(uint32_t aCh)
- {
- return u_isMirrored(aCh);
- }
- inline uint8_t
- GetCombiningClass(uint32_t aCh)
- {
- return u_getCombiningClass(aCh);
- }
- inline uint8_t
- GetGeneralCategory(uint32_t aCh)
- {
- return sICUtoHBcategory[u_charType(aCh)];
- }
- inline nsCharType
- GetBidiCat(uint32_t aCh)
- {
- return nsCharType(u_charDirection(aCh));
- }
- inline int8_t
- GetNumericValue(uint32_t aCh)
- {
- UNumericType type =
- UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
- return type == U_NT_DECIMAL || type == U_NT_DIGIT
- ? int8_t(u_getNumericValue(aCh)) : -1;
- }
- inline uint8_t
- GetLineBreakClass(uint32_t aCh)
- {
- return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
- }
- inline Script
- GetScriptCode(uint32_t aCh)
- {
- UErrorCode err = U_ZERO_ERROR;
- return Script(uscript_getScript(aCh, &err));
- }
- inline uint32_t
- GetScriptTagForCode(Script aScriptCode)
- {
- const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
- return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
- }
- inline PairedBracketType
- GetPairedBracketType(uint32_t aCh)
- {
- return PairedBracketType
- (u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
- }
- inline uint32_t
- GetPairedBracket(uint32_t aCh)
- {
- return u_getBidiPairedBracket(aCh);
- }
- inline uint32_t
- GetUppercase(uint32_t aCh)
- {
- return u_toupper(aCh);
- }
- inline uint32_t
- GetLowercase(uint32_t aCh)
- {
- return u_tolower(aCh);
- }
- inline uint32_t
- GetTitlecaseForLower(uint32_t aCh) // maps LC to titlecase, UC unchanged
- {
- return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
- }
- inline uint32_t
- GetTitlecaseForAll(uint32_t aCh) // maps both UC and LC to titlecase
- {
- return u_totitle(aCh);
- }
- inline bool
- IsEastAsianWidthFWH(uint32_t aCh)
- {
- switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
- case U_EA_FULLWIDTH:
- case U_EA_WIDE:
- case U_EA_HALFWIDTH:
- return true;
- case U_EA_AMBIGUOUS:
- case U_EA_NARROW:
- case U_EA_NEUTRAL:
- return false;
- }
- return false;
- }
- inline EmojiPresentation
- GetEmojiPresentation(uint32_t aCh)
- {
- if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
- return TextOnly;
- }
- if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) {
- return EmojiDefault;
- }
- return TextDefault;
- }
- // returns the simplified Gen Category as defined in nsIUGenCategory
- inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) {
- return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
- }
- inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
- return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
- }
- inline XidmodType GetIdentifierModification(uint32_t aCh) {
- return XidmodType(GetCharProps2(aCh).mXidmod);
- }
- uint32_t GetFullWidth(uint32_t aCh);
- // This is the reverse function of GetFullWidth which guarantees that
- // for every codepoint c, GetFullWidthInverse(GetFullWidth(c)) == c.
- // Note that, this function does not guarantee to convert all wide
- // form characters to their possible narrow form.
- uint32_t GetFullWidthInverse(uint32_t aCh);
- bool IsClusterExtender(uint32_t aCh, uint8_t aCategory);
- inline bool IsClusterExtender(uint32_t aCh) {
- return IsClusterExtender(aCh, GetGeneralCategory(aCh));
- }
- // A simple iterator for a string of char16_t codepoints that advances
- // by Unicode grapheme clusters
- class ClusterIterator
- {
- public:
- ClusterIterator(const char16_t* aText, uint32_t aLength)
- : mPos(aText), mLimit(aText + aLength)
- #ifdef DEBUG
- , mText(aText)
- #endif
- { }
- operator const char16_t* () const {
- return mPos;
- }
- bool AtEnd() const {
- return mPos >= mLimit;
- }
- void Next();
- private:
- const char16_t* mPos;
- const char16_t* mLimit;
- #ifdef DEBUG
- const char16_t* mText;
- #endif
- };
- // Count the number of grapheme clusters in the given string
- uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);
- // A simple reverse iterator for a string of char16_t codepoints that
- // advances by Unicode grapheme clusters
- class ClusterReverseIterator
- {
- public:
- ClusterReverseIterator(const char16_t* aText, uint32_t aLength)
- : mPos(aText + aLength), mLimit(aText)
- { }
- operator const char16_t* () const {
- return mPos;
- }
- bool AtEnd() const {
- return mPos <= mLimit;
- }
- void Next();
- private:
- const char16_t* mPos;
- const char16_t* mLimit;
- };
- } // end namespace unicode
- } // end namespace mozilla
- #endif /* NS_UNICODEPROPERTIES_H */
|