123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- /*
- **********************************************************************
- * Copyright (C) 1999-2009, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- *
- *
- * ucnv_err.h:
- */
- /**
- * \file
- * \brief C UConverter predefined error callbacks
- *
- * <h2>Error Behaviour Functions</h2>
- * Defines some error behaviour functions called by ucnv_{from,to}Unicode
- * These are provided as part of ICU and many are stable, but they
- * can also be considered only as an example of what can be done with
- * callbacks. You may of course write your own.
- *
- * If you want to write your own, you may also find the functions from
- * ucnv_cb.h useful when writing your own callbacks.
- *
- * These functions, although public, should NEVER be called directly.
- * They should be used as parameters to the ucnv_setFromUCallback
- * and ucnv_setToUCallback functions, to set the behaviour of a converter
- * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
- *
- * usage example: 'STOP' doesn't need any context, but newContext
- * could be set to something other than 'NULL' if needed. The available
- * contexts in this header can modify the default behavior of the callback.
- *
- * \code
- * UErrorCode err = U_ZERO_ERROR;
- * UConverter *myConverter = ucnv_open("ibm-949", &err);
- * const void *oldContext;
- * UConverterFromUCallback oldAction;
- *
- *
- * if (U_SUCCESS(err))
- * {
- * ucnv_setFromUCallBack(myConverter,
- * UCNV_FROM_U_CALLBACK_STOP,
- * NULL,
- * &oldAction,
- * &oldContext,
- * &status);
- * }
- * \endcode
- *
- * The code above tells "myConverter" to stop when it encounters an
- * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
- * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
- * and ucnv_setToUCallBack would need to be called in order to change
- * that behavior too.
- *
- * Here is an example with a context:
- *
- * \code
- * UErrorCode err = U_ZERO_ERROR;
- * UConverter *myConverter = ucnv_open("ibm-949", &err);
- * const void *oldContext;
- * UConverterFromUCallback oldAction;
- *
- *
- * if (U_SUCCESS(err))
- * {
- * ucnv_setToUCallBack(myConverter,
- * UCNV_TO_U_CALLBACK_SUBSTITUTE,
- * UCNV_SUB_STOP_ON_ILLEGAL,
- * &oldAction,
- * &oldContext,
- * &status);
- * }
- * \endcode
- *
- * The code above tells "myConverter" to stop when it encounters an
- * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
- * Codepage -> Unicode. Any unmapped and legal characters will be
- * substituted to be the default substitution character.
- */
- #ifndef UCNV_ERR_H
- #define UCNV_ERR_H
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_CONVERSION
- /** Forward declaring the UConverter structure. @stable ICU 2.0 */
- struct UConverter;
- /** @stable ICU 2.0 */
- typedef struct UConverter UConverter;
- /**
- * FROM_U, TO_U context options for sub callback
- * @stable ICU 2.0
- */
- #define UCNV_SUB_STOP_ON_ILLEGAL "i"
- /**
- * FROM_U, TO_U context options for skip callback
- * @stable ICU 2.0
- */
- #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
- * @stable ICU 2.0
- */
- #define UCNV_ESCAPE_ICU NULL
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
- * @stable ICU 2.0
- */
- #define UCNV_ESCAPE_JAVA "J"
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
- * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
- * @stable ICU 2.0
- */
- #define UCNV_ESCAPE_C "C"
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
- * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
- * @stable ICU 2.0
- */
- #define UCNV_ESCAPE_XML_DEC "D"
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
- * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
- * @stable ICU 2.0
- */
- #define UCNV_ESCAPE_XML_HEX "X"
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
- * @stable ICU 2.0
- */
- #define UCNV_ESCAPE_UNICODE "U"
- /**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
- * a backslash, 1..6 hex digits, and a space)
- * @stable ICU 4.0
- */
- #define UCNV_ESCAPE_CSS2 "S"
- /**
- * The process condition code to be used with the callbacks.
- * Codes which are greater than UCNV_IRREGULAR should be
- * passed on to any chained callbacks.
- * @stable ICU 2.0
- */
- typedef enum {
- UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
- The error code U_INVALID_CHAR_FOUND will be set. */
- UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
- \\x81\\x2E is illegal in SJIS because \\x2E
- is not a valid trail byte for the \\x81
- lead byte.
- Also, starting with Unicode 3.0.1, non-shortest byte sequences
- in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
- are also illegal, not just irregular.
- The error code U_ILLEGAL_CHAR_FOUND will be set. */
- UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
- the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
- are irregular UTF-8 byte sequences for single surrogate
- code points.
- The error code U_INVALID_CHAR_FOUND will be set. */
- UCNV_RESET = 3, /**< The callback is called with this reason when a
- 'reset' has occured. Callback should reset all
- state. */
- UCNV_CLOSE = 4, /**< Called when the converter is closed. The
- callback should release any allocated memory.*/
- UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
- converter. the pointer available as the
- 'context' is an alias to the original converters'
- context pointer. If the context must be owned
- by the new converter, the callback must clone
- the data and call ucnv_setFromUCallback
- (or setToUCallback) with the correct pointer.
- @stable ICU 2.2
- */
- } UConverterCallbackReason;
- /**
- * The structure for the fromUnicode callback function parameter.
- * @stable ICU 2.0
- */
- typedef struct {
- uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
- UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
- UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
- const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
- const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
- char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
- const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
- int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
- } UConverterFromUnicodeArgs;
- /**
- * The structure for the toUnicode callback function parameter.
- * @stable ICU 2.0
- */
- typedef struct {
- uint16_t size; /**< The size of this struct @stable ICU 2.0 */
- UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
- UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
- const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
- const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
- UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
- const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
- int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
- } UConverterToUnicodeArgs;
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- *
- * @param context Pointer to the callback's private data
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err This should always be set to a failure status prior to calling.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- *
- * @param context Pointer to the callback's private data
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err This should always be set to a failure status prior to calling.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
- const void *context,
- UConverterToUnicodeArgs *toUArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
- * skips only UNASSINGED_SEQUENCE depending on the context parameter
- * simply ignoring those characters.
- *
- * @param context The function currently recognizes the callback options:
- * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- * NULL: Skips any ILLEGAL_SEQUENCE
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- * otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
- * UNASSIGNED_SEQUENCE depending on context parameter, with the
- * current substitution string for the converter. This is the default
- * callback.
- *
- * @param context The function currently recognizes the callback options:
- * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- * NULL: Substitutes any ILLEGAL_SEQUENCE
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- * otherwise this value will be set to a failure status.
- * @see ucnv_setSubstChars
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
- * hexadecimal representation of the illegal codepoints
- *
- * @param context The function currently recognizes the callback options:
- * <ul>
- * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
- * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
- * In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
- * it will substitute the illegal sequence with the substitution characters.
- * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- * %UD84D%UDC56</li>
- * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
- * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
- * In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
- * it will substitute the illegal sequence with the substitution characters.
- * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- * \\uD84D\\uDC56</li>
- * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
- * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
- * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
- * it will substitute the illegal sequence with the substitution characters.
- * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- * \\U00023456</li>
- * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
- * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly.
- * In the Event the converter doesn't support the characters {&,#}[0-9],
- * it will substitute the illegal sequence with the substitution characters.
- * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- * &#144470; and Zero padding is ignored.</li>
- * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
- * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly.
- * In the Event the converter doesn't support the characters {&,#,x}[0-9],
- * it will substitute the illegal sequence with the substitution characters.
- * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- * \htmlonly&#x23456;\endhtmlonly</li>
- * </ul>
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- * otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const UChar* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
- * skips only UNASSINGED_SEQUENCE depending on the context parameter
- * simply ignoring those characters.
- *
- * @param context The function currently recognizes the callback options:
- * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- * NULL: Skips any ILLEGAL_SEQUENCE
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- * otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
- const void *context,
- UConverterToUnicodeArgs *toUArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
- * UNASSIGNED_SEQUENCE depending on context parameter, with the
- * Unicode substitution character, U+FFFD.
- *
- * @param context The function currently recognizes the callback options:
- * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- * NULL: Substitutes any ILLEGAL_SEQUENCE
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- * otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
- const void *context,
- UConverterToUnicodeArgs *toUArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err);
- /**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
- * hexadecimal representation of the illegal bytes
- * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
- *
- * @param context This function currently recognizes the callback options:
- * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
- * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- * otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
- U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
- const void *context,
- UConverterToUnicodeArgs *toUArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err);
- #endif
- #endif
- /*UCNV_ERR_H*/
|