ucharstrieiterator.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2010-2011, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: ucharstrieiterator.h
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2010nov15
  14. * created by: Markus W. Scherer
  15. */
  16. #include "unicode/utypes.h"
  17. #include "unicode/ucharstrie.h"
  18. #include "unicode/unistr.h"
  19. #include "uvectr32.h"
  20. U_NAMESPACE_BEGIN
  21. UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
  22. UErrorCode &errorCode)
  23. : uchars_(trieUChars),
  24. pos_(uchars_), initialPos_(uchars_),
  25. remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
  26. skipValue_(false),
  27. maxLength_(maxStringLength), value_(0), stack_(nullptr) {
  28. if(U_FAILURE(errorCode)) {
  29. return;
  30. }
  31. // stack_ is a pointer so that it's easy to turn ucharstrie.h into
  32. // a public API header for which we would want it to depend only on
  33. // other public headers.
  34. // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
  35. // via the UnicodeString and UVector32 implementations, so this additional
  36. // cost is minimal.
  37. stack_=new UVector32(errorCode);
  38. if(stack_==nullptr) {
  39. errorCode=U_MEMORY_ALLOCATION_ERROR;
  40. }
  41. }
  42. UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
  43. UErrorCode &errorCode)
  44. : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
  45. remainingMatchLength_(trie.remainingMatchLength_),
  46. initialRemainingMatchLength_(trie.remainingMatchLength_),
  47. skipValue_(false),
  48. maxLength_(maxStringLength), value_(0), stack_(nullptr) {
  49. if(U_FAILURE(errorCode)) {
  50. return;
  51. }
  52. stack_=new UVector32(errorCode);
  53. if(U_FAILURE(errorCode)) {
  54. return;
  55. }
  56. if(stack_==nullptr) {
  57. errorCode=U_MEMORY_ALLOCATION_ERROR;
  58. return;
  59. }
  60. int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
  61. if(length>=0) {
  62. // Pending linear-match node, append remaining UChars to str_.
  63. ++length;
  64. if(maxLength_>0 && length>maxLength_) {
  65. length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
  66. }
  67. str_.append(pos_, length);
  68. pos_+=length;
  69. remainingMatchLength_-=length;
  70. }
  71. }
  72. UCharsTrie::Iterator::~Iterator() {
  73. delete stack_;
  74. }
  75. UCharsTrie::Iterator &
  76. UCharsTrie::Iterator::reset() {
  77. pos_=initialPos_;
  78. remainingMatchLength_=initialRemainingMatchLength_;
  79. skipValue_=false;
  80. int32_t length=remainingMatchLength_+1; // Remaining match length.
  81. if(maxLength_>0 && length>maxLength_) {
  82. length=maxLength_;
  83. }
  84. str_.truncate(length);
  85. pos_+=length;
  86. remainingMatchLength_-=length;
  87. stack_->setSize(0);
  88. return *this;
  89. }
  90. UBool
  91. UCharsTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
  92. UBool
  93. UCharsTrie::Iterator::next(UErrorCode &errorCode) {
  94. if(U_FAILURE(errorCode)) {
  95. return false;
  96. }
  97. const char16_t *pos=pos_;
  98. if(pos==nullptr) {
  99. if(stack_->isEmpty()) {
  100. return false;
  101. }
  102. // Pop the state off the stack and continue with the next outbound edge of
  103. // the branch node.
  104. int32_t stackSize=stack_->size();
  105. int32_t length=stack_->elementAti(stackSize-1);
  106. pos=uchars_+stack_->elementAti(stackSize-2);
  107. stack_->setSize(stackSize-2);
  108. str_.truncate(length&0xffff);
  109. length=(int32_t)((uint32_t)length>>16);
  110. if(length>1) {
  111. pos=branchNext(pos, length, errorCode);
  112. if(pos==nullptr) {
  113. return true; // Reached a final value.
  114. }
  115. } else {
  116. str_.append(*pos++);
  117. }
  118. }
  119. if(remainingMatchLength_>=0) {
  120. // We only get here if we started in a pending linear-match node
  121. // with more than maxLength remaining units.
  122. return truncateAndStop();
  123. }
  124. for(;;) {
  125. int32_t node=*pos++;
  126. if(node>=kMinValueLead) {
  127. if(skipValue_) {
  128. pos=skipNodeValue(pos, node);
  129. node&=kNodeTypeMask;
  130. skipValue_=false;
  131. } else {
  132. // Deliver value for the string so far.
  133. UBool isFinal=(UBool)(node>>15);
  134. if(isFinal) {
  135. value_=readValue(pos, node&0x7fff);
  136. } else {
  137. value_=readNodeValue(pos, node);
  138. }
  139. if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
  140. pos_=nullptr;
  141. } else {
  142. // We cannot skip the value right here because it shares its
  143. // lead unit with a match node which we have to evaluate
  144. // next time.
  145. // Instead, keep pos_ on the node lead unit itself.
  146. pos_=pos-1;
  147. skipValue_=true;
  148. }
  149. return true;
  150. }
  151. }
  152. if(maxLength_>0 && str_.length()==maxLength_) {
  153. return truncateAndStop();
  154. }
  155. if(node<kMinLinearMatch) {
  156. if(node==0) {
  157. node=*pos++;
  158. }
  159. pos=branchNext(pos, node+1, errorCode);
  160. if(pos==nullptr) {
  161. return true; // Reached a final value.
  162. }
  163. } else {
  164. // Linear-match node, append length units to str_.
  165. int32_t length=node-kMinLinearMatch+1;
  166. if(maxLength_>0 && str_.length()+length>maxLength_) {
  167. str_.append(pos, maxLength_-str_.length());
  168. return truncateAndStop();
  169. }
  170. str_.append(pos, length);
  171. pos+=length;
  172. }
  173. }
  174. }
  175. // Branch node, needs to take the first outbound edge and push state for the rest.
  176. const char16_t *
  177. UCharsTrie::Iterator::branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode) {
  178. while(length>kMaxBranchLinearSubNodeLength) {
  179. ++pos; // ignore the comparison unit
  180. // Push state for the greater-or-equal edge.
  181. stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
  182. stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
  183. // Follow the less-than edge.
  184. length>>=1;
  185. pos=jumpByDelta(pos);
  186. }
  187. // List of key-value pairs where values are either final values or jump deltas.
  188. // Read the first (key, value) pair.
  189. char16_t trieUnit=*pos++;
  190. int32_t node=*pos++;
  191. UBool isFinal=(UBool)(node>>15);
  192. int32_t value=readValue(pos, node&=0x7fff);
  193. pos=skipValue(pos, node);
  194. stack_->addElement((int32_t)(pos-uchars_), errorCode);
  195. stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
  196. str_.append(trieUnit);
  197. if(isFinal) {
  198. pos_=nullptr;
  199. value_=value;
  200. return nullptr;
  201. } else {
  202. return pos+value;
  203. }
  204. }
  205. U_NAMESPACE_END