uiter.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708
  1. /*
  2. *******************************************************************************
  3. *
  4. * Copyright (C) 2002-2006,2009 International Business Machines
  5. * Corporation and others. All Rights Reserved.
  6. *
  7. *******************************************************************************
  8. * file name: uiter.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2002jan18
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __UITER_H__
  17. #define __UITER_H__
  18. /**
  19. * \file
  20. * \brief C API: Unicode Character Iteration
  21. *
  22. * @see UCharIterator
  23. */
  24. #include "unicode/utypes.h"
  25. #if U_SHOW_CPLUSPLUS_API
  26. U_NAMESPACE_BEGIN
  27. class CharacterIterator;
  28. class Replaceable;
  29. U_NAMESPACE_END
  30. #endif
  31. U_CDECL_BEGIN
  32. struct UCharIterator;
  33. typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
  34. /**
  35. * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
  36. * @see UCharIteratorMove
  37. * @see UCharIterator
  38. * @stable ICU 2.1
  39. */
  40. typedef enum UCharIteratorOrigin {
  41. UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
  42. } UCharIteratorOrigin;
  43. /** Constants for UCharIterator. @stable ICU 2.6 */
  44. enum {
  45. /**
  46. * Constant value that may be returned by UCharIteratorMove
  47. * indicating that the final UTF-16 index is not known, but that the move succeeded.
  48. * This can occur when moving relative to limit or length, or
  49. * when moving relative to the current index after a setState()
  50. * when the current UTF-16 index is not known.
  51. *
  52. * It would be very inefficient to have to count from the beginning of the text
  53. * just to get the current/limit/length index after moving relative to it.
  54. * The actual index can be determined with getIndex(UITER_CURRENT)
  55. * which will count the UChars if necessary.
  56. *
  57. * @stable ICU 2.6
  58. */
  59. UITER_UNKNOWN_INDEX=-2
  60. };
  61. /**
  62. * Constant for UCharIterator getState() indicating an error or
  63. * an unknown state.
  64. * Returned by uiter_getState()/UCharIteratorGetState
  65. * when an error occurs.
  66. * Also, some UCharIterator implementations may not be able to return
  67. * a valid state for each position. This will be clearly documented
  68. * for each such iterator (none of the public ones here).
  69. *
  70. * @stable ICU 2.6
  71. */
  72. #define UITER_NO_STATE ((uint32_t)0xffffffff)
  73. /**
  74. * Function type declaration for UCharIterator.getIndex().
  75. *
  76. * Gets the current position, or the start or limit of the
  77. * iteration range.
  78. *
  79. * This function may perform slowly for UITER_CURRENT after setState() was called,
  80. * or for UITER_LENGTH, because an iterator implementation may have to count
  81. * UChars if the underlying storage is not UTF-16.
  82. *
  83. * @param iter the UCharIterator structure ("this pointer")
  84. * @param origin get the 0, start, limit, length, or current index
  85. * @return the requested index, or U_SENTINEL in an error condition
  86. *
  87. * @see UCharIteratorOrigin
  88. * @see UCharIterator
  89. * @stable ICU 2.1
  90. */
  91. typedef int32_t U_CALLCONV
  92. UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
  93. /**
  94. * Function type declaration for UCharIterator.move().
  95. *
  96. * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
  97. *
  98. * Moves the current position relative to the start or limit of the
  99. * iteration range, or relative to the current position itself.
  100. * The movement is expressed in numbers of code units forward
  101. * or backward by specifying a positive or negative delta.
  102. * Out of bounds movement will be pinned to the start or limit.
  103. *
  104. * This function may perform slowly for moving relative to UITER_LENGTH
  105. * because an iterator implementation may have to count the rest of the
  106. * UChars if the native storage is not UTF-16.
  107. *
  108. * When moving relative to the limit or length, or
  109. * relative to the current position after setState() was called,
  110. * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
  111. * determination of the actual UTF-16 index.
  112. * The actual index can be determined with getIndex(UITER_CURRENT)
  113. * which will count the UChars if necessary.
  114. * See UITER_UNKNOWN_INDEX for details.
  115. *
  116. * @param iter the UCharIterator structure ("this pointer")
  117. * @param delta can be positive, zero, or negative
  118. * @param origin move relative to the 0, start, limit, length, or current index
  119. * @return the new index, or U_SENTINEL on an error condition,
  120. * or UITER_UNKNOWN_INDEX when the index is not known.
  121. *
  122. * @see UCharIteratorOrigin
  123. * @see UCharIterator
  124. * @see UITER_UNKNOWN_INDEX
  125. * @stable ICU 2.1
  126. */
  127. typedef int32_t U_CALLCONV
  128. UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
  129. /**
  130. * Function type declaration for UCharIterator.hasNext().
  131. *
  132. * Check if current() and next() can still
  133. * return another code unit.
  134. *
  135. * @param iter the UCharIterator structure ("this pointer")
  136. * @return boolean value for whether current() and next() can still return another code unit
  137. *
  138. * @see UCharIterator
  139. * @stable ICU 2.1
  140. */
  141. typedef UBool U_CALLCONV
  142. UCharIteratorHasNext(UCharIterator *iter);
  143. /**
  144. * Function type declaration for UCharIterator.hasPrevious().
  145. *
  146. * Check if previous() can still return another code unit.
  147. *
  148. * @param iter the UCharIterator structure ("this pointer")
  149. * @return boolean value for whether previous() can still return another code unit
  150. *
  151. * @see UCharIterator
  152. * @stable ICU 2.1
  153. */
  154. typedef UBool U_CALLCONV
  155. UCharIteratorHasPrevious(UCharIterator *iter);
  156. /**
  157. * Function type declaration for UCharIterator.current().
  158. *
  159. * Return the code unit at the current position,
  160. * or U_SENTINEL if there is none (index is at the limit).
  161. *
  162. * @param iter the UCharIterator structure ("this pointer")
  163. * @return the current code unit
  164. *
  165. * @see UCharIterator
  166. * @stable ICU 2.1
  167. */
  168. typedef UChar32 U_CALLCONV
  169. UCharIteratorCurrent(UCharIterator *iter);
  170. /**
  171. * Function type declaration for UCharIterator.next().
  172. *
  173. * Return the code unit at the current index and increment
  174. * the index (post-increment, like s[i++]),
  175. * or return U_SENTINEL if there is none (index is at the limit).
  176. *
  177. * @param iter the UCharIterator structure ("this pointer")
  178. * @return the current code unit (and post-increment the current index)
  179. *
  180. * @see UCharIterator
  181. * @stable ICU 2.1
  182. */
  183. typedef UChar32 U_CALLCONV
  184. UCharIteratorNext(UCharIterator *iter);
  185. /**
  186. * Function type declaration for UCharIterator.previous().
  187. *
  188. * Decrement the index and return the code unit from there
  189. * (pre-decrement, like s[--i]),
  190. * or return U_SENTINEL if there is none (index is at the start).
  191. *
  192. * @param iter the UCharIterator structure ("this pointer")
  193. * @return the previous code unit (after pre-decrementing the current index)
  194. *
  195. * @see UCharIterator
  196. * @stable ICU 2.1
  197. */
  198. typedef UChar32 U_CALLCONV
  199. UCharIteratorPrevious(UCharIterator *iter);
  200. /**
  201. * Function type declaration for UCharIterator.reservedFn().
  202. * Reserved for future use.
  203. *
  204. * @param iter the UCharIterator structure ("this pointer")
  205. * @param something some integer argument
  206. * @return some integer
  207. *
  208. * @see UCharIterator
  209. * @stable ICU 2.1
  210. */
  211. typedef int32_t U_CALLCONV
  212. UCharIteratorReserved(UCharIterator *iter, int32_t something);
  213. /**
  214. * Function type declaration for UCharIterator.getState().
  215. *
  216. * Get the "state" of the iterator in the form of a single 32-bit word.
  217. * It is recommended that the state value be calculated to be as small as
  218. * is feasible. For strings with limited lengths, fewer than 32 bits may
  219. * be sufficient.
  220. *
  221. * This is used together with setState()/UCharIteratorSetState
  222. * to save and restore the iterator position more efficiently than with
  223. * getIndex()/move().
  224. *
  225. * The iterator state is defined as a uint32_t value because it is designed
  226. * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
  227. * of the character iterator.
  228. *
  229. * With some UCharIterator implementations (e.g., UTF-8),
  230. * getting and setting the UTF-16 index with existing functions
  231. * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
  232. * relatively slow because the iterator has to "walk" from a known index
  233. * to the requested one.
  234. * This takes more time the farther it needs to go.
  235. *
  236. * An opaque state value allows an iterator implementation to provide
  237. * an internal index (UTF-8: the source byte array index) for
  238. * fast, constant-time restoration.
  239. *
  240. * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
  241. * the UTF-16 index may not be restored as well, but the iterator can deliver
  242. * the correct text contents and move relative to the current position
  243. * without performance degradation.
  244. *
  245. * Some UCharIterator implementations may not be able to return
  246. * a valid state for each position, in which case they return UITER_NO_STATE instead.
  247. * This will be clearly documented for each such iterator (none of the public ones here).
  248. *
  249. * @param iter the UCharIterator structure ("this pointer")
  250. * @return the state word
  251. *
  252. * @see UCharIterator
  253. * @see UCharIteratorSetState
  254. * @see UITER_NO_STATE
  255. * @stable ICU 2.6
  256. */
  257. typedef uint32_t U_CALLCONV
  258. UCharIteratorGetState(const UCharIterator *iter);
  259. /**
  260. * Function type declaration for UCharIterator.setState().
  261. *
  262. * Restore the "state" of the iterator using a state word from a getState() call.
  263. * The iterator object need not be the same one as for which getState() was called,
  264. * but it must be of the same type (set up using the same uiter_setXYZ function)
  265. * and it must iterate over the same string
  266. * (binary identical regardless of memory address).
  267. * For more about the state word see UCharIteratorGetState.
  268. *
  269. * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
  270. * the UTF-16 index may not be restored as well, but the iterator can deliver
  271. * the correct text contents and move relative to the current position
  272. * without performance degradation.
  273. *
  274. * @param iter the UCharIterator structure ("this pointer")
  275. * @param state the state word from a getState() call
  276. * on a same-type, same-string iterator
  277. * @param pErrorCode Must be a valid pointer to an error code value,
  278. * which must not indicate a failure before the function call.
  279. *
  280. * @see UCharIterator
  281. * @see UCharIteratorGetState
  282. * @stable ICU 2.6
  283. */
  284. typedef void U_CALLCONV
  285. UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
  286. /**
  287. * C API for code unit iteration.
  288. * This can be used as a C wrapper around
  289. * CharacterIterator, Replaceable, or implemented using simple strings, etc.
  290. *
  291. * There are two roles for using UCharIterator:
  292. *
  293. * A "provider" sets the necessary function pointers and controls the "protected"
  294. * fields of the UCharIterator structure. A "provider" passes a UCharIterator
  295. * into C APIs that need a UCharIterator as an abstract, flexible string interface.
  296. *
  297. * Implementations of such C APIs are "callers" of UCharIterator functions;
  298. * they only use the "public" function pointers and never access the "protected"
  299. * fields directly.
  300. *
  301. * The current() and next() functions only check the current index against the
  302. * limit, and previous() only checks the current index against the start,
  303. * to see if the iterator already reached the end of the iteration range.
  304. *
  305. * The assumption - in all iterators - is that the index is moved via the API,
  306. * which means it won't go out of bounds, or the index is modified by
  307. * user code that knows enough about the iterator implementation to set valid
  308. * index values.
  309. *
  310. * UCharIterator functions return code unit values 0..0xffff,
  311. * or U_SENTINEL if the iteration bounds are reached.
  312. *
  313. * @stable ICU 2.1
  314. */
  315. struct UCharIterator {
  316. /**
  317. * (protected) Pointer to string or wrapped object or similar.
  318. * Not used by caller.
  319. * @stable ICU 2.1
  320. */
  321. const void *context;
  322. /**
  323. * (protected) Length of string or similar.
  324. * Not used by caller.
  325. * @stable ICU 2.1
  326. */
  327. int32_t length;
  328. /**
  329. * (protected) Start index or similar.
  330. * Not used by caller.
  331. * @stable ICU 2.1
  332. */
  333. int32_t start;
  334. /**
  335. * (protected) Current index or similar.
  336. * Not used by caller.
  337. * @stable ICU 2.1
  338. */
  339. int32_t index;
  340. /**
  341. * (protected) Limit index or similar.
  342. * Not used by caller.
  343. * @stable ICU 2.1
  344. */
  345. int32_t limit;
  346. /**
  347. * (protected) Used by UTF-8 iterators and possibly others.
  348. * @stable ICU 2.1
  349. */
  350. int32_t reservedField;
  351. /**
  352. * (public) Returns the current position or the
  353. * start or limit index of the iteration range.
  354. *
  355. * @see UCharIteratorGetIndex
  356. * @stable ICU 2.1
  357. */
  358. UCharIteratorGetIndex *getIndex;
  359. /**
  360. * (public) Moves the current position relative to the start or limit of the
  361. * iteration range, or relative to the current position itself.
  362. * The movement is expressed in numbers of code units forward
  363. * or backward by specifying a positive or negative delta.
  364. *
  365. * @see UCharIteratorMove
  366. * @stable ICU 2.1
  367. */
  368. UCharIteratorMove *move;
  369. /**
  370. * (public) Check if current() and next() can still
  371. * return another code unit.
  372. *
  373. * @see UCharIteratorHasNext
  374. * @stable ICU 2.1
  375. */
  376. UCharIteratorHasNext *hasNext;
  377. /**
  378. * (public) Check if previous() can still return another code unit.
  379. *
  380. * @see UCharIteratorHasPrevious
  381. * @stable ICU 2.1
  382. */
  383. UCharIteratorHasPrevious *hasPrevious;
  384. /**
  385. * (public) Return the code unit at the current position,
  386. * or U_SENTINEL if there is none (index is at the limit).
  387. *
  388. * @see UCharIteratorCurrent
  389. * @stable ICU 2.1
  390. */
  391. UCharIteratorCurrent *current;
  392. /**
  393. * (public) Return the code unit at the current index and increment
  394. * the index (post-increment, like s[i++]),
  395. * or return U_SENTINEL if there is none (index is at the limit).
  396. *
  397. * @see UCharIteratorNext
  398. * @stable ICU 2.1
  399. */
  400. UCharIteratorNext *next;
  401. /**
  402. * (public) Decrement the index and return the code unit from there
  403. * (pre-decrement, like s[--i]),
  404. * or return U_SENTINEL if there is none (index is at the start).
  405. *
  406. * @see UCharIteratorPrevious
  407. * @stable ICU 2.1
  408. */
  409. UCharIteratorPrevious *previous;
  410. /**
  411. * (public) Reserved for future use. Currently NULL.
  412. *
  413. * @see UCharIteratorReserved
  414. * @stable ICU 2.1
  415. */
  416. UCharIteratorReserved *reservedFn;
  417. /**
  418. * (public) Return the state of the iterator, to be restored later with setState().
  419. * This function pointer is NULL if the iterator does not implement it.
  420. *
  421. * @see UCharIteratorGet
  422. * @stable ICU 2.6
  423. */
  424. UCharIteratorGetState *getState;
  425. /**
  426. * (public) Restore the iterator state from the state word from a call
  427. * to getState().
  428. * This function pointer is NULL if the iterator does not implement it.
  429. *
  430. * @see UCharIteratorSet
  431. * @stable ICU 2.6
  432. */
  433. UCharIteratorSetState *setState;
  434. };
  435. /**
  436. * Helper function for UCharIterator to get the code point
  437. * at the current index.
  438. *
  439. * Return the code point that includes the code unit at the current position,
  440. * or U_SENTINEL if there is none (index is at the limit).
  441. * If the current code unit is a lead or trail surrogate,
  442. * then the following or preceding surrogate is used to form
  443. * the code point value.
  444. *
  445. * @param iter the UCharIterator structure ("this pointer")
  446. * @return the current code point
  447. *
  448. * @see UCharIterator
  449. * @see U16_GET
  450. * @see UnicodeString::char32At()
  451. * @stable ICU 2.1
  452. */
  453. U_STABLE UChar32 U_EXPORT2
  454. uiter_current32(UCharIterator *iter);
  455. /**
  456. * Helper function for UCharIterator to get the next code point.
  457. *
  458. * Return the code point at the current index and increment
  459. * the index (post-increment, like s[i++]),
  460. * or return U_SENTINEL if there is none (index is at the limit).
  461. *
  462. * @param iter the UCharIterator structure ("this pointer")
  463. * @return the current code point (and post-increment the current index)
  464. *
  465. * @see UCharIterator
  466. * @see U16_NEXT
  467. * @stable ICU 2.1
  468. */
  469. U_STABLE UChar32 U_EXPORT2
  470. uiter_next32(UCharIterator *iter);
  471. /**
  472. * Helper function for UCharIterator to get the previous code point.
  473. *
  474. * Decrement the index and return the code point from there
  475. * (pre-decrement, like s[--i]),
  476. * or return U_SENTINEL if there is none (index is at the start).
  477. *
  478. * @param iter the UCharIterator structure ("this pointer")
  479. * @return the previous code point (after pre-decrementing the current index)
  480. *
  481. * @see UCharIterator
  482. * @see U16_PREV
  483. * @stable ICU 2.1
  484. */
  485. U_STABLE UChar32 U_EXPORT2
  486. uiter_previous32(UCharIterator *iter);
  487. /**
  488. * Get the "state" of the iterator in the form of a single 32-bit word.
  489. * This is a convenience function that calls iter->getState(iter)
  490. * if iter->getState is not NULL;
  491. * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
  492. *
  493. * Some UCharIterator implementations may not be able to return
  494. * a valid state for each position, in which case they return UITER_NO_STATE instead.
  495. * This will be clearly documented for each such iterator (none of the public ones here).
  496. *
  497. * @param iter the UCharIterator structure ("this pointer")
  498. * @return the state word
  499. *
  500. * @see UCharIterator
  501. * @see UCharIteratorGetState
  502. * @see UITER_NO_STATE
  503. * @stable ICU 2.6
  504. */
  505. U_STABLE uint32_t U_EXPORT2
  506. uiter_getState(const UCharIterator *iter);
  507. /**
  508. * Restore the "state" of the iterator using a state word from a getState() call.
  509. * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
  510. * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
  511. *
  512. * @param iter the UCharIterator structure ("this pointer")
  513. * @param state the state word from a getState() call
  514. * on a same-type, same-string iterator
  515. * @param pErrorCode Must be a valid pointer to an error code value,
  516. * which must not indicate a failure before the function call.
  517. *
  518. * @see UCharIterator
  519. * @see UCharIteratorSetState
  520. * @stable ICU 2.6
  521. */
  522. U_STABLE void U_EXPORT2
  523. uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
  524. /**
  525. * Set up a UCharIterator to iterate over a string.
  526. *
  527. * Sets the UCharIterator function pointers for iteration over the string s
  528. * with iteration boundaries start=index=0 and length=limit=string length.
  529. * The "provider" may set the start, index, and limit values at any time
  530. * within the range 0..length.
  531. * The length field will be ignored.
  532. *
  533. * The string pointer s is set into UCharIterator.context without copying
  534. * or reallocating the string contents.
  535. *
  536. * getState() simply returns the current index.
  537. * move() will always return the final index.
  538. *
  539. * @param iter UCharIterator structure to be set for iteration
  540. * @param s String to iterate over
  541. * @param length Length of s, or -1 if NUL-terminated
  542. *
  543. * @see UCharIterator
  544. * @stable ICU 2.1
  545. */
  546. U_STABLE void U_EXPORT2
  547. uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
  548. /**
  549. * Set up a UCharIterator to iterate over a UTF-16BE string
  550. * (byte vector with a big-endian pair of bytes per UChar).
  551. *
  552. * Everything works just like with a normal UChar iterator (uiter_setString),
  553. * except that UChars are assembled from byte pairs,
  554. * and that the length argument here indicates an even number of bytes.
  555. *
  556. * getState() simply returns the current index.
  557. * move() will always return the final index.
  558. *
  559. * @param iter UCharIterator structure to be set for iteration
  560. * @param s UTF-16BE string to iterate over
  561. * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
  562. * (NUL means pair of 0 bytes at even index from s)
  563. *
  564. * @see UCharIterator
  565. * @see uiter_setString
  566. * @stable ICU 2.6
  567. */
  568. U_STABLE void U_EXPORT2
  569. uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
  570. /**
  571. * Set up a UCharIterator to iterate over a UTF-8 string.
  572. *
  573. * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
  574. * with UTF-8 iteration boundaries 0 and length.
  575. * The implementation counts the UTF-16 index on the fly and
  576. * lazily evaluates the UTF-16 length of the text.
  577. *
  578. * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
  579. * When the reservedField is not 0, then it contains a supplementary code point
  580. * and the UTF-16 index is between the two corresponding surrogates.
  581. * At that point, the UTF-8 index is behind that code point.
  582. *
  583. * The UTF-8 string pointer s is set into UCharIterator.context without copying
  584. * or reallocating the string contents.
  585. *
  586. * getState() returns a state value consisting of
  587. * - the current UTF-8 source byte index (bits 31..1)
  588. * - a flag (bit 0) that indicates whether the UChar position is in the middle
  589. * of a surrogate pair
  590. * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
  591. *
  592. * getState() cannot also encode the UTF-16 index in the state value.
  593. * move(relative to limit or length), or
  594. * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
  595. *
  596. * @param iter UCharIterator structure to be set for iteration
  597. * @param s UTF-8 string to iterate over
  598. * @param length Length of s in bytes, or -1 if NUL-terminated
  599. *
  600. * @see UCharIterator
  601. * @stable ICU 2.6
  602. */
  603. U_STABLE void U_EXPORT2
  604. uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
  605. #if U_SHOW_CPLUSPLUS_API
  606. /**
  607. * Set up a UCharIterator to wrap around a C++ CharacterIterator.
  608. *
  609. * Sets the UCharIterator function pointers for iteration using the
  610. * CharacterIterator charIter.
  611. *
  612. * The CharacterIterator pointer charIter is set into UCharIterator.context
  613. * without copying or cloning the CharacterIterator object.
  614. * The other "protected" UCharIterator fields are set to 0 and will be ignored.
  615. * The iteration index and boundaries are controlled by the CharacterIterator.
  616. *
  617. * getState() simply returns the current index.
  618. * move() will always return the final index.
  619. *
  620. * @param iter UCharIterator structure to be set for iteration
  621. * @param charIter CharacterIterator to wrap
  622. *
  623. * @see UCharIterator
  624. * @stable ICU 2.1
  625. */
  626. U_STABLE void U_EXPORT2
  627. uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterIterator *charIter);
  628. /**
  629. * Set up a UCharIterator to iterate over a C++ Replaceable.
  630. *
  631. * Sets the UCharIterator function pointers for iteration over the
  632. * Replaceable rep with iteration boundaries start=index=0 and
  633. * length=limit=rep->length().
  634. * The "provider" may set the start, index, and limit values at any time
  635. * within the range 0..length=rep->length().
  636. * The length field will be ignored.
  637. *
  638. * The Replaceable pointer rep is set into UCharIterator.context without copying
  639. * or cloning/reallocating the Replaceable object.
  640. *
  641. * getState() simply returns the current index.
  642. * move() will always return the final index.
  643. *
  644. * @param iter UCharIterator structure to be set for iteration
  645. * @param rep Replaceable to iterate over
  646. *
  647. * @see UCharIterator
  648. * @stable ICU 2.1
  649. */
  650. U_STABLE void U_EXPORT2
  651. uiter_setReplaceable(UCharIterator *iter, const U_NAMESPACE_QUALIFIER Replaceable *rep);
  652. #endif
  653. U_CDECL_END
  654. #endif