norm2allmodes.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * norm2allmodes.h
  9. *
  10. * created on: 2014sep07
  11. * created by: Markus W. Scherer
  12. */
  13. #ifndef __NORM2ALLMODES_H__
  14. #define __NORM2ALLMODES_H__
  15. #include "unicode/utypes.h"
  16. #if !UCONFIG_NO_NORMALIZATION
  17. #include "unicode/edits.h"
  18. #include "unicode/normalizer2.h"
  19. #include "unicode/stringoptions.h"
  20. #include "unicode/unistr.h"
  21. #include "cpputils.h"
  22. #include "normalizer2impl.h"
  23. U_NAMESPACE_BEGIN
  24. // Intermediate class:
  25. // Has Normalizer2Impl and does boilerplate argument checking and setup.
  26. class Normalizer2WithImpl : public Normalizer2 {
  27. public:
  28. Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
  29. virtual ~Normalizer2WithImpl();
  30. // normalize
  31. virtual UnicodeString &
  32. normalize(const UnicodeString &src,
  33. UnicodeString &dest,
  34. UErrorCode &errorCode) const override {
  35. if(U_FAILURE(errorCode)) {
  36. dest.setToBogus();
  37. return dest;
  38. }
  39. const char16_t *sArray=src.getBuffer();
  40. if(&dest==&src || sArray==nullptr) {
  41. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  42. dest.setToBogus();
  43. return dest;
  44. }
  45. dest.remove();
  46. ReorderingBuffer buffer(impl, dest);
  47. if(buffer.init(src.length(), errorCode)) {
  48. normalize(sArray, sArray+src.length(), buffer, errorCode);
  49. }
  50. return dest;
  51. }
  52. virtual void
  53. normalize(const char16_t *src, const char16_t *limit,
  54. ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
  55. // normalize and append
  56. virtual UnicodeString &
  57. normalizeSecondAndAppend(UnicodeString &first,
  58. const UnicodeString &second,
  59. UErrorCode &errorCode) const override {
  60. return normalizeSecondAndAppend(first, second, true, errorCode);
  61. }
  62. virtual UnicodeString &
  63. append(UnicodeString &first,
  64. const UnicodeString &second,
  65. UErrorCode &errorCode) const override {
  66. return normalizeSecondAndAppend(first, second, false, errorCode);
  67. }
  68. UnicodeString &
  69. normalizeSecondAndAppend(UnicodeString &first,
  70. const UnicodeString &second,
  71. UBool doNormalize,
  72. UErrorCode &errorCode) const {
  73. uprv_checkCanGetBuffer(first, errorCode);
  74. if(U_FAILURE(errorCode)) {
  75. return first;
  76. }
  77. const char16_t *secondArray=second.getBuffer();
  78. if(&first==&second || secondArray==nullptr) {
  79. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  80. return first;
  81. }
  82. int32_t firstLength=first.length();
  83. UnicodeString safeMiddle;
  84. {
  85. ReorderingBuffer buffer(impl, first);
  86. if(buffer.init(firstLength+second.length(), errorCode)) {
  87. normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
  88. safeMiddle, buffer, errorCode);
  89. }
  90. } // The ReorderingBuffer destructor finalizes the first string.
  91. if(U_FAILURE(errorCode)) {
  92. // Restore the modified suffix of the first string.
  93. first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
  94. }
  95. return first;
  96. }
  97. virtual void
  98. normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
  99. UnicodeString &safeMiddle,
  100. ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
  101. virtual UBool
  102. getDecomposition(UChar32 c, UnicodeString &decomposition) const override {
  103. char16_t buffer[4];
  104. int32_t length;
  105. const char16_t *d=impl.getDecomposition(c, buffer, length);
  106. if(d==nullptr) {
  107. return false;
  108. }
  109. if(d==buffer) {
  110. decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
  111. } else {
  112. decomposition.setTo(false, d, length); // read-only alias
  113. }
  114. return true;
  115. }
  116. virtual UBool
  117. getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override {
  118. char16_t buffer[30];
  119. int32_t length;
  120. const char16_t *d=impl.getRawDecomposition(c, buffer, length);
  121. if(d==nullptr) {
  122. return false;
  123. }
  124. if(d==buffer) {
  125. decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
  126. } else {
  127. decomposition.setTo(false, d, length); // read-only alias
  128. }
  129. return true;
  130. }
  131. virtual UChar32
  132. composePair(UChar32 a, UChar32 b) const override {
  133. return impl.composePair(a, b);
  134. }
  135. virtual uint8_t
  136. getCombiningClass(UChar32 c) const override {
  137. return impl.getCC(impl.getNorm16(c));
  138. }
  139. // quick checks
  140. virtual UBool
  141. isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
  142. if(U_FAILURE(errorCode)) {
  143. return false;
  144. }
  145. const char16_t *sArray=s.getBuffer();
  146. if(sArray==nullptr) {
  147. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  148. return false;
  149. }
  150. const char16_t *sLimit=sArray+s.length();
  151. return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
  152. }
  153. virtual UNormalizationCheckResult
  154. quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
  155. return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
  156. }
  157. virtual int32_t
  158. spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override {
  159. if(U_FAILURE(errorCode)) {
  160. return 0;
  161. }
  162. const char16_t *sArray=s.getBuffer();
  163. if(sArray==nullptr) {
  164. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  165. return 0;
  166. }
  167. return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
  168. }
  169. virtual const char16_t *
  170. spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
  171. virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
  172. return UNORM_YES;
  173. }
  174. const Normalizer2Impl &impl;
  175. };
  176. class DecomposeNormalizer2 : public Normalizer2WithImpl {
  177. public:
  178. DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
  179. virtual ~DecomposeNormalizer2();
  180. private:
  181. virtual void
  182. normalize(const char16_t *src, const char16_t *limit,
  183. ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
  184. impl.decompose(src, limit, &buffer, errorCode);
  185. }
  186. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
  187. virtual void
  188. normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
  189. UnicodeString &safeMiddle,
  190. ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
  191. impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
  192. }
  193. void
  194. normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
  195. Edits *edits, UErrorCode &errorCode) const override {
  196. if (U_FAILURE(errorCode)) {
  197. return;
  198. }
  199. if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
  200. edits->reset();
  201. }
  202. const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
  203. impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
  204. sink.Flush();
  205. }
  206. virtual UBool
  207. isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
  208. if(U_FAILURE(errorCode)) {
  209. return false;
  210. }
  211. const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
  212. const uint8_t *sLimit = s + sp.length();
  213. return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
  214. }
  215. virtual const char16_t *
  216. spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
  217. return impl.decompose(src, limit, nullptr, errorCode);
  218. }
  219. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
  220. virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
  221. return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
  222. }
  223. virtual UBool hasBoundaryBefore(UChar32 c) const override {
  224. return impl.hasDecompBoundaryBefore(c);
  225. }
  226. virtual UBool hasBoundaryAfter(UChar32 c) const override {
  227. return impl.hasDecompBoundaryAfter(c);
  228. }
  229. virtual UBool isInert(UChar32 c) const override {
  230. return impl.isDecompInert(c);
  231. }
  232. };
  233. class ComposeNormalizer2 : public Normalizer2WithImpl {
  234. public:
  235. ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
  236. Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
  237. virtual ~ComposeNormalizer2();
  238. private:
  239. virtual void
  240. normalize(const char16_t *src, const char16_t *limit,
  241. ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
  242. impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
  243. }
  244. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
  245. void
  246. normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
  247. Edits *edits, UErrorCode &errorCode) const override {
  248. if (U_FAILURE(errorCode)) {
  249. return;
  250. }
  251. if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
  252. edits->reset();
  253. }
  254. const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
  255. impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
  256. &sink, edits, errorCode);
  257. sink.Flush();
  258. }
  259. virtual void
  260. normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
  261. UnicodeString &safeMiddle,
  262. ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
  263. impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
  264. }
  265. virtual UBool
  266. isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
  267. if(U_FAILURE(errorCode)) {
  268. return false;
  269. }
  270. const char16_t *sArray=s.getBuffer();
  271. if(sArray==nullptr) {
  272. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  273. return false;
  274. }
  275. UnicodeString temp;
  276. ReorderingBuffer buffer(impl, temp);
  277. if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
  278. return false;
  279. }
  280. return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
  281. }
  282. virtual UBool
  283. isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
  284. if(U_FAILURE(errorCode)) {
  285. return false;
  286. }
  287. const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
  288. return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
  289. }
  290. virtual UNormalizationCheckResult
  291. quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
  292. if(U_FAILURE(errorCode)) {
  293. return UNORM_MAYBE;
  294. }
  295. const char16_t *sArray=s.getBuffer();
  296. if(sArray==nullptr) {
  297. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  298. return UNORM_MAYBE;
  299. }
  300. UNormalizationCheckResult qcResult=UNORM_YES;
  301. impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
  302. return qcResult;
  303. }
  304. virtual const char16_t *
  305. spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override {
  306. return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr);
  307. }
  308. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
  309. virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
  310. return impl.getCompQuickCheck(impl.getNorm16(c));
  311. }
  312. virtual UBool hasBoundaryBefore(UChar32 c) const override {
  313. return impl.hasCompBoundaryBefore(c);
  314. }
  315. virtual UBool hasBoundaryAfter(UChar32 c) const override {
  316. return impl.hasCompBoundaryAfter(c, onlyContiguous);
  317. }
  318. virtual UBool isInert(UChar32 c) const override {
  319. return impl.isCompInert(c, onlyContiguous);
  320. }
  321. const UBool onlyContiguous;
  322. };
  323. class FCDNormalizer2 : public Normalizer2WithImpl {
  324. public:
  325. FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
  326. virtual ~FCDNormalizer2();
  327. private:
  328. virtual void
  329. normalize(const char16_t *src, const char16_t *limit,
  330. ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
  331. impl.makeFCD(src, limit, &buffer, errorCode);
  332. }
  333. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
  334. virtual void
  335. normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
  336. UnicodeString &safeMiddle,
  337. ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
  338. impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
  339. }
  340. virtual const char16_t *
  341. spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
  342. return impl.makeFCD(src, limit, nullptr, errorCode);
  343. }
  344. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
  345. virtual UBool hasBoundaryBefore(UChar32 c) const override {
  346. return impl.hasFCDBoundaryBefore(c);
  347. }
  348. virtual UBool hasBoundaryAfter(UChar32 c) const override {
  349. return impl.hasFCDBoundaryAfter(c);
  350. }
  351. virtual UBool isInert(UChar32 c) const override {
  352. return impl.isFCDInert(c);
  353. }
  354. };
  355. struct Norm2AllModes : public UMemory {
  356. Norm2AllModes(Normalizer2Impl *i)
  357. : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
  358. ~Norm2AllModes();
  359. static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
  360. static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
  361. static Norm2AllModes *createInstance(const char *packageName,
  362. const char *name,
  363. UErrorCode &errorCode);
  364. static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
  365. static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
  366. static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
  367. Normalizer2Impl *impl;
  368. ComposeNormalizer2 comp;
  369. DecomposeNormalizer2 decomp;
  370. FCDNormalizer2 fcd;
  371. ComposeNormalizer2 fcc;
  372. };
  373. U_NAMESPACE_END
  374. #endif // !UCONFIG_NO_NORMALIZATION
  375. #endif // __NORM2ALLMODES_H__