ucasemap.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2005-2012, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: ucasemap.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2005may06
  16. * created by: Markus W. Scherer
  17. *
  18. * Case mapping service object and functions using it.
  19. */
  20. #ifndef __UCASEMAP_H__
  21. #define __UCASEMAP_H__
  22. #include "unicode/utypes.h"
  23. #include "unicode/stringoptions.h"
  24. #include "unicode/ustring.h"
  25. #if U_SHOW_CPLUSPLUS_API
  26. #include "unicode/localpointer.h"
  27. #endif // U_SHOW_CPLUSPLUS_API
  28. /**
  29. * \file
  30. * \brief C API: Unicode case mapping functions using a UCaseMap service object.
  31. *
  32. * The service object takes care of memory allocations, data loading, and setup
  33. * for the attributes, as usual.
  34. *
  35. * Currently, the functionality provided here does not overlap with uchar.h
  36. * and ustring.h, except for ucasemap_toTitle().
  37. *
  38. * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
  39. */
  40. /**
  41. * UCaseMap is an opaque service object for newer ICU case mapping functions.
  42. * Older functions did not use a service object.
  43. * @stable ICU 3.4
  44. */
  45. struct UCaseMap;
  46. typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
  47. /**
  48. * Open a UCaseMap service object for a locale and a set of options.
  49. * The locale ID and options are preprocessed so that functions using the
  50. * service object need not process them in each call.
  51. *
  52. * @param locale ICU locale ID, used for language-dependent
  53. * upper-/lower-/title-casing according to the Unicode standard.
  54. * Usual semantics: ""=root, NULL=default locale, etc.
  55. * @param options Options bit set, used for case folding and string comparisons.
  56. * Same flags as for u_foldCase(), u_strFoldCase(),
  57. * u_strCaseCompare(), etc.
  58. * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
  59. * @param pErrorCode Must be a valid pointer to an error code value,
  60. * which must not indicate a failure before the function call.
  61. * @return Pointer to a UCaseMap service object, if successful.
  62. *
  63. * @see U_FOLD_CASE_DEFAULT
  64. * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
  65. * @see U_TITLECASE_NO_LOWERCASE
  66. * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  67. * @stable ICU 3.4
  68. */
  69. U_CAPI UCaseMap * U_EXPORT2
  70. ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
  71. /**
  72. * Close a UCaseMap service object.
  73. * @param csm Object to be closed.
  74. * @stable ICU 3.4
  75. */
  76. U_CAPI void U_EXPORT2
  77. ucasemap_close(UCaseMap *csm);
  78. #if U_SHOW_CPLUSPLUS_API
  79. U_NAMESPACE_BEGIN
  80. /**
  81. * \class LocalUCaseMapPointer
  82. * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
  83. * For most methods see the LocalPointerBase base class.
  84. *
  85. * @see LocalPointerBase
  86. * @see LocalPointer
  87. * @stable ICU 4.4
  88. */
  89. U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
  90. U_NAMESPACE_END
  91. #endif
  92. /**
  93. * Get the locale ID that is used for language-dependent case mappings.
  94. * @param csm UCaseMap service object.
  95. * @return locale ID
  96. * @stable ICU 3.4
  97. */
  98. U_CAPI const char * U_EXPORT2
  99. ucasemap_getLocale(const UCaseMap *csm);
  100. /**
  101. * Get the options bit set that is used for case folding and string comparisons.
  102. * @param csm UCaseMap service object.
  103. * @return options bit set
  104. * @stable ICU 3.4
  105. */
  106. U_CAPI uint32_t U_EXPORT2
  107. ucasemap_getOptions(const UCaseMap *csm);
  108. /**
  109. * Set the locale ID that is used for language-dependent case mappings.
  110. *
  111. * @param csm UCaseMap service object.
  112. * @param locale Locale ID, see ucasemap_open().
  113. * @param pErrorCode Must be a valid pointer to an error code value,
  114. * which must not indicate a failure before the function call.
  115. *
  116. * @see ucasemap_open
  117. * @stable ICU 3.4
  118. */
  119. U_CAPI void U_EXPORT2
  120. ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
  121. /**
  122. * Set the options bit set that is used for case folding and string comparisons.
  123. *
  124. * @param csm UCaseMap service object.
  125. * @param options Options bit set, see ucasemap_open().
  126. * @param pErrorCode Must be a valid pointer to an error code value,
  127. * which must not indicate a failure before the function call.
  128. *
  129. * @see ucasemap_open
  130. * @stable ICU 3.4
  131. */
  132. U_CAPI void U_EXPORT2
  133. ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
  134. #if !UCONFIG_NO_BREAK_ITERATION
  135. /**
  136. * Get the break iterator that is used for titlecasing.
  137. * Do not modify the returned break iterator.
  138. * @param csm UCaseMap service object.
  139. * @return titlecasing break iterator
  140. * @stable ICU 3.8
  141. */
  142. U_CAPI const UBreakIterator * U_EXPORT2
  143. ucasemap_getBreakIterator(const UCaseMap *csm);
  144. /**
  145. * Set the break iterator that is used for titlecasing.
  146. * The UCaseMap service object releases a previously set break iterator
  147. * and "adopts" this new one, taking ownership of it.
  148. * It will be released in a subsequent call to ucasemap_setBreakIterator()
  149. * or ucasemap_close().
  150. *
  151. * Break iterator operations are not thread-safe. Therefore, titlecasing
  152. * functions use non-const UCaseMap objects. It is not possible to titlecase
  153. * strings concurrently using the same UCaseMap.
  154. *
  155. * @param csm UCaseMap service object.
  156. * @param iterToAdopt Break iterator to be adopted for titlecasing.
  157. * @param pErrorCode Must be a valid pointer to an error code value,
  158. * which must not indicate a failure before the function call.
  159. *
  160. * @see ucasemap_toTitle
  161. * @see ucasemap_utf8ToTitle
  162. * @stable ICU 3.8
  163. */
  164. U_CAPI void U_EXPORT2
  165. ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
  166. /**
  167. * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
  168. * except that it takes ucasemap_setOptions() into account and has performance
  169. * advantages from being able to use a UCaseMap object for multiple case mapping
  170. * operations, saving setup time.
  171. *
  172. * Casing is locale-dependent and context-sensitive.
  173. * Titlecasing uses a break iterator to find the first characters of words
  174. * that are to be titlecased. It titlecases those characters and lowercases
  175. * all others. (This can be modified with ucasemap_setOptions().)
  176. *
  177. * Note: This function takes a non-const UCaseMap pointer because it will
  178. * open a default break iterator if no break iterator was set yet,
  179. * and effectively call ucasemap_setBreakIterator();
  180. * also because the break iterator is stateful and will be modified during
  181. * the iteration.
  182. *
  183. * The titlecase break iterator can be provided to customize for arbitrary
  184. * styles, using rules and dictionaries beyond the standard iterators.
  185. * The standard titlecase iterator for the root locale implements the
  186. * algorithm of Unicode TR 21.
  187. *
  188. * This function uses only the setText(), first() and next() methods of the
  189. * provided break iterator.
  190. *
  191. * The result may be longer or shorter than the original.
  192. * The source string and the destination buffer must not overlap.
  193. *
  194. * @param csm UCaseMap service object. This pointer is non-const!
  195. * See the note above for details.
  196. * @param dest A buffer for the result string. The result will be NUL-terminated if
  197. * the buffer is large enough.
  198. * The contents is undefined in case of failure.
  199. * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
  200. * dest may be NULL and the function will only return the length of the result
  201. * without writing any of the result string.
  202. * @param src The original string.
  203. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  204. * @param pErrorCode Must be a valid pointer to an error code value,
  205. * which must not indicate a failure before the function call.
  206. * @return The length of the result string, if successful - or in case of a buffer overflow,
  207. * in which case it will be greater than destCapacity.
  208. *
  209. * @see u_strToTitle
  210. * @stable ICU 3.8
  211. */
  212. U_CAPI int32_t U_EXPORT2
  213. ucasemap_toTitle(UCaseMap *csm,
  214. UChar *dest, int32_t destCapacity,
  215. const UChar *src, int32_t srcLength,
  216. UErrorCode *pErrorCode);
  217. #endif // UCONFIG_NO_BREAK_ITERATION
  218. /**
  219. * Lowercase the characters in a UTF-8 string.
  220. * Casing is locale-dependent and context-sensitive.
  221. * The result may be longer or shorter than the original.
  222. * The source string and the destination buffer must not overlap.
  223. *
  224. * @param csm UCaseMap service object.
  225. * @param dest A buffer for the result string. The result will be NUL-terminated if
  226. * the buffer is large enough.
  227. * The contents is undefined in case of failure.
  228. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
  229. * dest may be NULL and the function will only return the length of the result
  230. * without writing any of the result string.
  231. * @param src The original string.
  232. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  233. * @param pErrorCode Must be a valid pointer to an error code value,
  234. * which must not indicate a failure before the function call.
  235. * @return The length of the result string, if successful - or in case of a buffer overflow,
  236. * in which case it will be greater than destCapacity.
  237. *
  238. * @see u_strToLower
  239. * @stable ICU 3.4
  240. */
  241. U_CAPI int32_t U_EXPORT2
  242. ucasemap_utf8ToLower(const UCaseMap *csm,
  243. char *dest, int32_t destCapacity,
  244. const char *src, int32_t srcLength,
  245. UErrorCode *pErrorCode);
  246. /**
  247. * Uppercase the characters in a UTF-8 string.
  248. * Casing is locale-dependent and context-sensitive.
  249. * The result may be longer or shorter than the original.
  250. * The source string and the destination buffer must not overlap.
  251. *
  252. * @param csm UCaseMap service object.
  253. * @param dest A buffer for the result string. The result will be NUL-terminated if
  254. * the buffer is large enough.
  255. * The contents is undefined in case of failure.
  256. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
  257. * dest may be NULL and the function will only return the length of the result
  258. * without writing any of the result string.
  259. * @param src The original string.
  260. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  261. * @param pErrorCode Must be a valid pointer to an error code value,
  262. * which must not indicate a failure before the function call.
  263. * @return The length of the result string, if successful - or in case of a buffer overflow,
  264. * in which case it will be greater than destCapacity.
  265. *
  266. * @see u_strToUpper
  267. * @stable ICU 3.4
  268. */
  269. U_CAPI int32_t U_EXPORT2
  270. ucasemap_utf8ToUpper(const UCaseMap *csm,
  271. char *dest, int32_t destCapacity,
  272. const char *src, int32_t srcLength,
  273. UErrorCode *pErrorCode);
  274. #if !UCONFIG_NO_BREAK_ITERATION
  275. /**
  276. * Titlecase a UTF-8 string.
  277. * Casing is locale-dependent and context-sensitive.
  278. * Titlecasing uses a break iterator to find the first characters of words
  279. * that are to be titlecased. It titlecases those characters and lowercases
  280. * all others. (This can be modified with ucasemap_setOptions().)
  281. *
  282. * Note: This function takes a non-const UCaseMap pointer because it will
  283. * open a default break iterator if no break iterator was set yet,
  284. * and effectively call ucasemap_setBreakIterator();
  285. * also because the break iterator is stateful and will be modified during
  286. * the iteration.
  287. *
  288. * The titlecase break iterator can be provided to customize for arbitrary
  289. * styles, using rules and dictionaries beyond the standard iterators.
  290. * The standard titlecase iterator for the root locale implements the
  291. * algorithm of Unicode TR 21.
  292. *
  293. * This function uses only the setUText(), first(), next() and close() methods of the
  294. * provided break iterator.
  295. *
  296. * The result may be longer or shorter than the original.
  297. * The source string and the destination buffer must not overlap.
  298. *
  299. * @param csm UCaseMap service object. This pointer is non-const!
  300. * See the note above for details.
  301. * @param dest A buffer for the result string. The result will be NUL-terminated if
  302. * the buffer is large enough.
  303. * The contents is undefined in case of failure.
  304. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
  305. * dest may be NULL and the function will only return the length of the result
  306. * without writing any of the result string.
  307. * @param src The original string.
  308. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  309. * @param pErrorCode Must be a valid pointer to an error code value,
  310. * which must not indicate a failure before the function call.
  311. * @return The length of the result string, if successful - or in case of a buffer overflow,
  312. * in which case it will be greater than destCapacity.
  313. *
  314. * @see u_strToTitle
  315. * @see U_TITLECASE_NO_LOWERCASE
  316. * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  317. * @stable ICU 3.8
  318. */
  319. U_CAPI int32_t U_EXPORT2
  320. ucasemap_utf8ToTitle(UCaseMap *csm,
  321. char *dest, int32_t destCapacity,
  322. const char *src, int32_t srcLength,
  323. UErrorCode *pErrorCode);
  324. #endif
  325. /**
  326. * Case-folds the characters in a UTF-8 string.
  327. *
  328. * Case-folding is locale-independent and not context-sensitive,
  329. * but there is an option for whether to include or exclude mappings for dotted I
  330. * and dotless i that are marked with 'T' in CaseFolding.txt.
  331. *
  332. * The result may be longer or shorter than the original.
  333. * The source string and the destination buffer must not overlap.
  334. *
  335. * @param csm UCaseMap service object.
  336. * @param dest A buffer for the result string. The result will be NUL-terminated if
  337. * the buffer is large enough.
  338. * The contents is undefined in case of failure.
  339. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
  340. * dest may be NULL and the function will only return the length of the result
  341. * without writing any of the result string.
  342. * @param src The original string.
  343. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  344. * @param pErrorCode Must be a valid pointer to an error code value,
  345. * which must not indicate a failure before the function call.
  346. * @return The length of the result string, if successful - or in case of a buffer overflow,
  347. * in which case it will be greater than destCapacity.
  348. *
  349. * @see u_strFoldCase
  350. * @see ucasemap_setOptions
  351. * @see U_FOLD_CASE_DEFAULT
  352. * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
  353. * @stable ICU 3.8
  354. */
  355. U_CAPI int32_t U_EXPORT2
  356. ucasemap_utf8FoldCase(const UCaseMap *csm,
  357. char *dest, int32_t destCapacity,
  358. const char *src, int32_t srcLength,
  359. UErrorCode *pErrorCode);
  360. #endif