123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- * Copyright (C) 2014, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
- * norm2allmodes.h
- *
- * created on: 2014sep07
- * created by: Markus W. Scherer
- */
- #ifndef __NORM2ALLMODES_H__
- #define __NORM2ALLMODES_H__
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_NORMALIZATION
- #include "unicode/edits.h"
- #include "unicode/normalizer2.h"
- #include "unicode/stringoptions.h"
- #include "unicode/unistr.h"
- #include "cpputils.h"
- #include "normalizer2impl.h"
- U_NAMESPACE_BEGIN
- // Intermediate class:
- // Has Normalizer2Impl and does boilerplate argument checking and setup.
- class Normalizer2WithImpl : public Normalizer2 {
- public:
- Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
- virtual ~Normalizer2WithImpl();
- // normalize
- virtual UnicodeString &
- normalize(const UnicodeString &src,
- UnicodeString &dest,
- UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- dest.setToBogus();
- return dest;
- }
- const char16_t *sArray=src.getBuffer();
- if(&dest==&src || sArray==nullptr) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- dest.setToBogus();
- return dest;
- }
- dest.remove();
- ReorderingBuffer buffer(impl, dest);
- if(buffer.init(src.length(), errorCode)) {
- normalize(sArray, sArray+src.length(), buffer, errorCode);
- }
- return dest;
- }
- virtual void
- normalize(const char16_t *src, const char16_t *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
- // normalize and append
- virtual UnicodeString &
- normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const override {
- return normalizeSecondAndAppend(first, second, true, errorCode);
- }
- virtual UnicodeString &
- append(UnicodeString &first,
- const UnicodeString &second,
- UErrorCode &errorCode) const override {
- return normalizeSecondAndAppend(first, second, false, errorCode);
- }
- UnicodeString &
- normalizeSecondAndAppend(UnicodeString &first,
- const UnicodeString &second,
- UBool doNormalize,
- UErrorCode &errorCode) const {
- uprv_checkCanGetBuffer(first, errorCode);
- if(U_FAILURE(errorCode)) {
- return first;
- }
- const char16_t *secondArray=second.getBuffer();
- if(&first==&second || secondArray==nullptr) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return first;
- }
- int32_t firstLength=first.length();
- UnicodeString safeMiddle;
- {
- ReorderingBuffer buffer(impl, first);
- if(buffer.init(firstLength+second.length(), errorCode)) {
- normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
- safeMiddle, buffer, errorCode);
- }
- } // The ReorderingBuffer destructor finalizes the first string.
- if(U_FAILURE(errorCode)) {
- // Restore the modified suffix of the first string.
- first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
- }
- return first;
- }
- virtual void
- normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
- virtual UBool
- getDecomposition(UChar32 c, UnicodeString &decomposition) const override {
- char16_t buffer[4];
- int32_t length;
- const char16_t *d=impl.getDecomposition(c, buffer, length);
- if(d==nullptr) {
- return false;
- }
- if(d==buffer) {
- decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
- } else {
- decomposition.setTo(false, d, length); // read-only alias
- }
- return true;
- }
- virtual UBool
- getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override {
- char16_t buffer[30];
- int32_t length;
- const char16_t *d=impl.getRawDecomposition(c, buffer, length);
- if(d==nullptr) {
- return false;
- }
- if(d==buffer) {
- decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
- } else {
- decomposition.setTo(false, d, length); // read-only alias
- }
- return true;
- }
- virtual UChar32
- composePair(UChar32 a, UChar32 b) const override {
- return impl.composePair(a, b);
- }
- virtual uint8_t
- getCombiningClass(UChar32 c) const override {
- return impl.getCC(impl.getNorm16(c));
- }
- // quick checks
- virtual UBool
- isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- return false;
- }
- const char16_t *sArray=s.getBuffer();
- if(sArray==nullptr) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return false;
- }
- const char16_t *sLimit=sArray+s.length();
- return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
- }
- virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
- return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
- }
- virtual int32_t
- spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- return 0;
- }
- const char16_t *sArray=s.getBuffer();
- if(sArray==nullptr) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
- }
- virtual const char16_t *
- spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
- virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
- return UNORM_YES;
- }
- const Normalizer2Impl &impl;
- };
- class DecomposeNormalizer2 : public Normalizer2WithImpl {
- public:
- DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
- virtual ~DecomposeNormalizer2();
- private:
- virtual void
- normalize(const char16_t *src, const char16_t *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
- impl.decompose(src, limit, &buffer, errorCode);
- }
- using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
- virtual void
- normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
- impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
- }
- void
- normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
- Edits *edits, UErrorCode &errorCode) const override {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
- impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
- sink.Flush();
- }
- virtual UBool
- isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- return false;
- }
- const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
- const uint8_t *sLimit = s + sp.length();
- return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
- }
- virtual const char16_t *
- spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
- return impl.decompose(src, limit, nullptr, errorCode);
- }
- using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
- return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
- }
- virtual UBool hasBoundaryBefore(UChar32 c) const override {
- return impl.hasDecompBoundaryBefore(c);
- }
- virtual UBool hasBoundaryAfter(UChar32 c) const override {
- return impl.hasDecompBoundaryAfter(c);
- }
- virtual UBool isInert(UChar32 c) const override {
- return impl.isDecompInert(c);
- }
- };
- class ComposeNormalizer2 : public Normalizer2WithImpl {
- public:
- ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
- Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
- virtual ~ComposeNormalizer2();
- private:
- virtual void
- normalize(const char16_t *src, const char16_t *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
- impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
- }
- using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
- void
- normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
- Edits *edits, UErrorCode &errorCode) const override {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
- edits->reset();
- }
- const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
- impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
- &sink, edits, errorCode);
- sink.Flush();
- }
- virtual void
- normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
- impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
- }
- virtual UBool
- isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- return false;
- }
- const char16_t *sArray=s.getBuffer();
- if(sArray==nullptr) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return false;
- }
- UnicodeString temp;
- ReorderingBuffer buffer(impl, temp);
- if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
- return false;
- }
- return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
- }
- virtual UBool
- isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- return false;
- }
- const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
- return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
- }
- virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
- if(U_FAILURE(errorCode)) {
- return UNORM_MAYBE;
- }
- const char16_t *sArray=s.getBuffer();
- if(sArray==nullptr) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return UNORM_MAYBE;
- }
- UNormalizationCheckResult qcResult=UNORM_YES;
- impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
- return qcResult;
- }
- virtual const char16_t *
- spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override {
- return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr);
- }
- using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
- return impl.getCompQuickCheck(impl.getNorm16(c));
- }
- virtual UBool hasBoundaryBefore(UChar32 c) const override {
- return impl.hasCompBoundaryBefore(c);
- }
- virtual UBool hasBoundaryAfter(UChar32 c) const override {
- return impl.hasCompBoundaryAfter(c, onlyContiguous);
- }
- virtual UBool isInert(UChar32 c) const override {
- return impl.isCompInert(c, onlyContiguous);
- }
- const UBool onlyContiguous;
- };
- class FCDNormalizer2 : public Normalizer2WithImpl {
- public:
- FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
- virtual ~FCDNormalizer2();
- private:
- virtual void
- normalize(const char16_t *src, const char16_t *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
- impl.makeFCD(src, limit, &buffer, errorCode);
- }
- using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
- virtual void
- normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
- UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
- impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
- }
- virtual const char16_t *
- spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
- return impl.makeFCD(src, limit, nullptr, errorCode);
- }
- using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UBool hasBoundaryBefore(UChar32 c) const override {
- return impl.hasFCDBoundaryBefore(c);
- }
- virtual UBool hasBoundaryAfter(UChar32 c) const override {
- return impl.hasFCDBoundaryAfter(c);
- }
- virtual UBool isInert(UChar32 c) const override {
- return impl.isFCDInert(c);
- }
- };
- struct Norm2AllModes : public UMemory {
- Norm2AllModes(Normalizer2Impl *i)
- : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
- ~Norm2AllModes();
- static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
- static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
- static Norm2AllModes *createInstance(const char *packageName,
- const char *name,
- UErrorCode &errorCode);
- static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
- static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
- static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
- Normalizer2Impl *impl;
- ComposeNormalizer2 comp;
- DecomposeNormalizer2 decomp;
- FCDNormalizer2 fcd;
- ComposeNormalizer2 fcc;
- };
- U_NAMESPACE_END
- #endif // !UCONFIG_NO_NORMALIZATION
- #endif // __NORM2ALLMODES_H__
|