ucnv_err.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1999-2009, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. *
  10. * ucnv_err.h:
  11. */
  12. /**
  13. * \file
  14. * \brief C API: UConverter predefined error callbacks
  15. *
  16. * <h2>Error Behaviour Functions</h2>
  17. * Defines some error behaviour functions called by ucnv_{from,to}Unicode
  18. * These are provided as part of ICU and many are stable, but they
  19. * can also be considered only as an example of what can be done with
  20. * callbacks. You may of course write your own.
  21. *
  22. * If you want to write your own, you may also find the functions from
  23. * ucnv_cb.h useful when writing your own callbacks.
  24. *
  25. * These functions, although public, should NEVER be called directly.
  26. * They should be used as parameters to the ucnv_setFromUCallback
  27. * and ucnv_setToUCallback functions, to set the behaviour of a converter
  28. * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
  29. *
  30. * usage example: 'STOP' doesn't need any context, but newContext
  31. * could be set to something other than 'NULL' if needed. The available
  32. * contexts in this header can modify the default behavior of the callback.
  33. *
  34. * \code
  35. * UErrorCode err = U_ZERO_ERROR;
  36. * UConverter *myConverter = ucnv_open("ibm-949", &err);
  37. * const void *oldContext;
  38. * UConverterFromUCallback oldAction;
  39. *
  40. *
  41. * if (U_SUCCESS(err))
  42. * {
  43. * ucnv_setFromUCallBack(myConverter,
  44. * UCNV_FROM_U_CALLBACK_STOP,
  45. * NULL,
  46. * &oldAction,
  47. * &oldContext,
  48. * &status);
  49. * }
  50. * \endcode
  51. *
  52. * The code above tells "myConverter" to stop when it encounters an
  53. * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
  54. * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
  55. * and ucnv_setToUCallBack would need to be called in order to change
  56. * that behavior too.
  57. *
  58. * Here is an example with a context:
  59. *
  60. * \code
  61. * UErrorCode err = U_ZERO_ERROR;
  62. * UConverter *myConverter = ucnv_open("ibm-949", &err);
  63. * const void *oldContext;
  64. * UConverterFromUCallback oldAction;
  65. *
  66. *
  67. * if (U_SUCCESS(err))
  68. * {
  69. * ucnv_setToUCallBack(myConverter,
  70. * UCNV_TO_U_CALLBACK_SUBSTITUTE,
  71. * UCNV_SUB_STOP_ON_ILLEGAL,
  72. * &oldAction,
  73. * &oldContext,
  74. * &status);
  75. * }
  76. * \endcode
  77. *
  78. * The code above tells "myConverter" to stop when it encounters an
  79. * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
  80. * Codepage -> Unicode. Any unmapped and legal characters will be
  81. * substituted to be the default substitution character.
  82. */
  83. #ifndef UCNV_ERR_H
  84. #define UCNV_ERR_H
  85. #include "unicode/utypes.h"
  86. #if !UCONFIG_NO_CONVERSION
  87. /** Forward declaring the UConverter structure. @stable ICU 2.0 */
  88. struct UConverter;
  89. /** @stable ICU 2.0 */
  90. typedef struct UConverter UConverter;
  91. /**
  92. * FROM_U, TO_U context options for sub callback
  93. * @stable ICU 2.0
  94. */
  95. #define UCNV_SUB_STOP_ON_ILLEGAL "i"
  96. /**
  97. * FROM_U, TO_U context options for skip callback
  98. * @stable ICU 2.0
  99. */
  100. #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
  101. /**
  102. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
  103. * @stable ICU 2.0
  104. */
  105. #define UCNV_ESCAPE_ICU NULL
  106. /**
  107. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
  108. * @stable ICU 2.0
  109. */
  110. #define UCNV_ESCAPE_JAVA "J"
  111. /**
  112. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
  113. * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
  114. * @stable ICU 2.0
  115. */
  116. #define UCNV_ESCAPE_C "C"
  117. /**
  118. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
  119. * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
  120. * @stable ICU 2.0
  121. */
  122. #define UCNV_ESCAPE_XML_DEC "D"
  123. /**
  124. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
  125. * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
  126. * @stable ICU 2.0
  127. */
  128. #define UCNV_ESCAPE_XML_HEX "X"
  129. /**
  130. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
  131. * @stable ICU 2.0
  132. */
  133. #define UCNV_ESCAPE_UNICODE "U"
  134. /**
  135. * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
  136. * a backslash, 1..6 hex digits, and a space)
  137. * @stable ICU 4.0
  138. */
  139. #define UCNV_ESCAPE_CSS2 "S"
  140. /**
  141. * The process condition code to be used with the callbacks.
  142. * Codes which are greater than UCNV_IRREGULAR should be
  143. * passed on to any chained callbacks.
  144. * @stable ICU 2.0
  145. */
  146. typedef enum {
  147. UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
  148. The error code U_INVALID_CHAR_FOUND will be set. */
  149. UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
  150. \\x81\\x2E is illegal in SJIS because \\x2E
  151. is not a valid trail byte for the \\x81
  152. lead byte.
  153. Also, starting with Unicode 3.0.1, non-shortest byte sequences
  154. in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
  155. are also illegal, not just irregular.
  156. The error code U_ILLEGAL_CHAR_FOUND will be set. */
  157. UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
  158. the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
  159. are irregular UTF-8 byte sequences for single surrogate
  160. code points.
  161. The error code U_INVALID_CHAR_FOUND will be set. */
  162. UCNV_RESET = 3, /**< The callback is called with this reason when a
  163. 'reset' has occurred. Callback should reset all
  164. state. */
  165. UCNV_CLOSE = 4, /**< Called when the converter is closed. The
  166. callback should release any allocated memory.*/
  167. UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
  168. converter. the pointer available as the
  169. 'context' is an alias to the original converters'
  170. context pointer. If the context must be owned
  171. by the new converter, the callback must clone
  172. the data and call ucnv_setFromUCallback
  173. (or setToUCallback) with the correct pointer.
  174. @stable ICU 2.2
  175. */
  176. } UConverterCallbackReason;
  177. /**
  178. * The structure for the fromUnicode callback function parameter.
  179. * @stable ICU 2.0
  180. */
  181. typedef struct {
  182. uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
  183. UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
  184. UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
  185. const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
  186. const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
  187. char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
  188. const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
  189. int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
  190. } UConverterFromUnicodeArgs;
  191. /**
  192. * The structure for the toUnicode callback function parameter.
  193. * @stable ICU 2.0
  194. */
  195. typedef struct {
  196. uint16_t size; /**< The size of this struct @stable ICU 2.0 */
  197. UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
  198. UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
  199. const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
  200. const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
  201. UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
  202. const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
  203. int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
  204. } UConverterToUnicodeArgs;
  205. /**
  206. * DO NOT CALL THIS FUNCTION DIRECTLY!
  207. * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
  208. * returning the error code back to the caller immediately.
  209. *
  210. * @param context Pointer to the callback's private data
  211. * @param fromUArgs Information about the conversion in progress
  212. * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
  213. * @param length Size (in bytes) of the concerned codepage sequence
  214. * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
  215. * @param reason Defines the reason the callback was invoked
  216. * @param err This should always be set to a failure status prior to calling.
  217. * @stable ICU 2.0
  218. */
  219. U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
  220. const void *context,
  221. UConverterFromUnicodeArgs *fromUArgs,
  222. const UChar* codeUnits,
  223. int32_t length,
  224. UChar32 codePoint,
  225. UConverterCallbackReason reason,
  226. UErrorCode * err);
  227. /**
  228. * DO NOT CALL THIS FUNCTION DIRECTLY!
  229. * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
  230. * returning the error code back to the caller immediately.
  231. *
  232. * @param context Pointer to the callback's private data
  233. * @param toUArgs Information about the conversion in progress
  234. * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
  235. * @param length Size (in bytes) of the concerned codepage sequence
  236. * @param reason Defines the reason the callback was invoked
  237. * @param err This should always be set to a failure status prior to calling.
  238. * @stable ICU 2.0
  239. */
  240. U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
  241. const void *context,
  242. UConverterToUnicodeArgs *toUArgs,
  243. const char* codeUnits,
  244. int32_t length,
  245. UConverterCallbackReason reason,
  246. UErrorCode * err);
  247. /**
  248. * DO NOT CALL THIS FUNCTION DIRECTLY!
  249. * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
  250. * skips only UNASSIGNED_SEQUENCE depending on the context parameter
  251. * simply ignoring those characters.
  252. *
  253. * @param context The function currently recognizes the callback options:
  254. * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
  255. * returning the error code back to the caller immediately.
  256. * NULL: Skips any ILLEGAL_SEQUENCE
  257. * @param fromUArgs Information about the conversion in progress
  258. * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
  259. * @param length Size (in bytes) of the concerned codepage sequence
  260. * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
  261. * @param reason Defines the reason the callback was invoked
  262. * @param err Return value will be set to success if the callback was handled,
  263. * otherwise this value will be set to a failure status.
  264. * @stable ICU 2.0
  265. */
  266. U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
  267. const void *context,
  268. UConverterFromUnicodeArgs *fromUArgs,
  269. const UChar* codeUnits,
  270. int32_t length,
  271. UChar32 codePoint,
  272. UConverterCallbackReason reason,
  273. UErrorCode * err);
  274. /**
  275. * DO NOT CALL THIS FUNCTION DIRECTLY!
  276. * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
  277. * UNASSIGNED_SEQUENCE depending on context parameter, with the
  278. * current substitution string for the converter. This is the default
  279. * callback.
  280. *
  281. * @param context The function currently recognizes the callback options:
  282. * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
  283. * returning the error code back to the caller immediately.
  284. * NULL: Substitutes any ILLEGAL_SEQUENCE
  285. * @param fromUArgs Information about the conversion in progress
  286. * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
  287. * @param length Size (in bytes) of the concerned codepage sequence
  288. * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
  289. * @param reason Defines the reason the callback was invoked
  290. * @param err Return value will be set to success if the callback was handled,
  291. * otherwise this value will be set to a failure status.
  292. * @see ucnv_setSubstChars
  293. * @stable ICU 2.0
  294. */
  295. U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
  296. const void *context,
  297. UConverterFromUnicodeArgs *fromUArgs,
  298. const UChar* codeUnits,
  299. int32_t length,
  300. UChar32 codePoint,
  301. UConverterCallbackReason reason,
  302. UErrorCode * err);
  303. /**
  304. * DO NOT CALL THIS FUNCTION DIRECTLY!
  305. * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
  306. * hexadecimal representation of the illegal codepoints
  307. *
  308. * @param context The function currently recognizes the callback options:
  309. * <ul>
  310. * <li>UCNV_ESCAPE_ICU: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
  311. * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
  312. * In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
  313. * it will substitute the illegal sequence with the substitution characters.
  314. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
  315. * %UD84D%UDC56</li>
  316. * <li>UCNV_ESCAPE_JAVA: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
  317. * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
  318. * In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
  319. * it will substitute the illegal sequence with the substitution characters.
  320. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
  321. * \\uD84D\\uDC56</li>
  322. * <li>UCNV_ESCAPE_C: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
  323. * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
  324. * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
  325. * it will substitute the illegal sequence with the substitution characters.
  326. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
  327. * \\U00023456</li>
  328. * <li>UCNV_ESCAPE_XML_DEC: Substitutes the ILLEGAL SEQUENCE with the decimal
  329. * representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
  330. * In the Event the converter doesn't support the characters {&amp;,#}[0-9],
  331. * it will substitute the illegal sequence with the substitution characters.
  332. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
  333. * &amp;#144470; and Zero padding is ignored.</li>
  334. * <li>UCNV_ESCAPE_XML_HEX:Substitutes the ILLEGAL SEQUENCE with the decimal
  335. * representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
  336. * In the Event the converter doesn't support the characters {&,#,x}[0-9],
  337. * it will substitute the illegal sequence with the substitution characters.
  338. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
  339. * \htmlonly&amp;#x23456;\endhtmlonly</li>
  340. * </ul>
  341. * @param fromUArgs Information about the conversion in progress
  342. * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
  343. * @param length Size (in bytes) of the concerned codepage sequence
  344. * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
  345. * @param reason Defines the reason the callback was invoked
  346. * @param err Return value will be set to success if the callback was handled,
  347. * otherwise this value will be set to a failure status.
  348. * @stable ICU 2.0
  349. */
  350. U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
  351. const void *context,
  352. UConverterFromUnicodeArgs *fromUArgs,
  353. const UChar* codeUnits,
  354. int32_t length,
  355. UChar32 codePoint,
  356. UConverterCallbackReason reason,
  357. UErrorCode * err);
  358. /**
  359. * DO NOT CALL THIS FUNCTION DIRECTLY!
  360. * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
  361. * skips only UNASSIGNED_SEQUENCE depending on the context parameter
  362. * simply ignoring those characters.
  363. *
  364. * @param context The function currently recognizes the callback options:
  365. * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
  366. * returning the error code back to the caller immediately.
  367. * NULL: Skips any ILLEGAL_SEQUENCE
  368. * @param toUArgs Information about the conversion in progress
  369. * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
  370. * @param length Size (in bytes) of the concerned codepage sequence
  371. * @param reason Defines the reason the callback was invoked
  372. * @param err Return value will be set to success if the callback was handled,
  373. * otherwise this value will be set to a failure status.
  374. * @stable ICU 2.0
  375. */
  376. U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
  377. const void *context,
  378. UConverterToUnicodeArgs *toUArgs,
  379. const char* codeUnits,
  380. int32_t length,
  381. UConverterCallbackReason reason,
  382. UErrorCode * err);
  383. /**
  384. * DO NOT CALL THIS FUNCTION DIRECTLY!
  385. * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
  386. * UNASSIGNED_SEQUENCE depending on context parameter, with the
  387. * Unicode substitution character, U+FFFD.
  388. *
  389. * @param context The function currently recognizes the callback options:
  390. * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
  391. * returning the error code back to the caller immediately.
  392. * NULL: Substitutes any ILLEGAL_SEQUENCE
  393. * @param toUArgs Information about the conversion in progress
  394. * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
  395. * @param length Size (in bytes) of the concerned codepage sequence
  396. * @param reason Defines the reason the callback was invoked
  397. * @param err Return value will be set to success if the callback was handled,
  398. * otherwise this value will be set to a failure status.
  399. * @stable ICU 2.0
  400. */
  401. U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
  402. const void *context,
  403. UConverterToUnicodeArgs *toUArgs,
  404. const char* codeUnits,
  405. int32_t length,
  406. UConverterCallbackReason reason,
  407. UErrorCode * err);
  408. /**
  409. * DO NOT CALL THIS FUNCTION DIRECTLY!
  410. * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
  411. * hexadecimal representation of the illegal bytes
  412. * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
  413. *
  414. * @param context This function currently recognizes the callback options:
  415. * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
  416. * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
  417. * @param toUArgs Information about the conversion in progress
  418. * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
  419. * @param length Size (in bytes) of the concerned codepage sequence
  420. * @param reason Defines the reason the callback was invoked
  421. * @param err Return value will be set to success if the callback was handled,
  422. * otherwise this value will be set to a failure status.
  423. * @stable ICU 2.0
  424. */
  425. U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
  426. const void *context,
  427. UConverterToUnicodeArgs *toUArgs,
  428. const char* codeUnits,
  429. int32_t length,
  430. UConverterCallbackReason reason,
  431. UErrorCode * err);
  432. #endif
  433. #endif
  434. /*UCNV_ERR_H*/