ucnv_bld.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1999-2015 International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. *
  10. * ucnv_bld.h:
  11. * Contains internal data structure definitions
  12. * Created by Bertrand A. Damiba
  13. *
  14. * Change history:
  15. *
  16. * 06/29/2000 helena Major rewrite of the callback APIs.
  17. */
  18. #ifndef UCNV_BLD_H
  19. #define UCNV_BLD_H
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_CONVERSION
  22. #include "unicode/ucnv.h"
  23. #include "unicode/ucnv_err.h"
  24. #include "unicode/utf16.h"
  25. #include "ucnv_cnv.h"
  26. #include "ucnvmbcs.h"
  27. #include "ucnv_ext.h"
  28. #include "udataswp.h"
  29. /* size of the overflow buffers in UConverter, enough for escaping callbacks */
  30. #define UCNV_ERROR_BUFFER_LENGTH 32
  31. /* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
  32. #define UCNV_MAX_SUBCHAR_LEN 4
  33. /* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
  34. #define UCNV_MAX_CHAR_LEN 8
  35. /* converter options bits */
  36. #define UCNV_OPTION_VERSION 0xf
  37. #define UCNV_OPTION_SWAP_LFNL 0x10
  38. #define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION)
  39. U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
  40. itself is compiled under C++, the linkage of the funcptrs will
  41. work.
  42. */
  43. union UConverterTable {
  44. UConverterMBCSTable mbcs;
  45. };
  46. typedef union UConverterTable UConverterTable;
  47. struct UConverterImpl;
  48. typedef struct UConverterImpl UConverterImpl;
  49. /** values for the unicodeMask */
  50. #define UCNV_HAS_SUPPLEMENTARY 1
  51. #define UCNV_HAS_SURROGATES 2
  52. typedef struct UConverterStaticData { /* +offset: size */
  53. uint32_t structSize; /* +0: 4 Size of this structure */
  54. char name
  55. [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */
  56. int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */
  57. int8_t platform; /* +68: 1 platform of the converter (only IBM now) */
  58. int8_t conversionType; /* +69: 1 conversion type */
  59. int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
  60. int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
  61. uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */
  62. int8_t subCharLen; /* +76: 1 */
  63. uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
  64. uint8_t hasFromUnicodeFallback; /* +78: 1 */
  65. uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
  66. uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
  67. uint8_t reserved[19]; /* +81: 19 to round out the structure */
  68. /* total size: 100 */
  69. } UConverterStaticData;
  70. /*
  71. * Defines the UConverterSharedData struct,
  72. * the immutable, shared part of UConverter.
  73. */
  74. struct UConverterSharedData {
  75. uint32_t structSize; /* Size of this structure */
  76. uint32_t referenceCounter; /* used to count number of clients, unused for static/immutable SharedData */
  77. const void *dataMemory; /* from udata_openChoice() - for cleanup */
  78. const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
  79. UBool sharedDataCached; /* true: shared data is in cache, don't destroy on ucnv_close() if 0 ref. false: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
  80. /** If false, then referenceCounter is not used. Must not change after initialization. */
  81. UBool isReferenceCounted;
  82. const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
  83. /*initial values of some members of the mutable part of object */
  84. uint32_t toUnicodeStatus;
  85. /*
  86. * Shared data structures currently come in two flavors:
  87. * - readonly for built-in algorithmic converters
  88. * - allocated for MBCS, with a pointer to an allocated UConverterTable
  89. * which always has a UConverterMBCSTable
  90. *
  91. * To eliminate one allocation, I am making the UConverterMBCSTable
  92. * a member of the shared data.
  93. *
  94. * markus 2003-nov-07
  95. */
  96. UConverterMBCSTable mbcs;
  97. };
  98. /** UConverterSharedData initializer for static, non-reference-counted converters. */
  99. #define UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(pStaticData, pImpl) \
  100. { \
  101. sizeof(UConverterSharedData), ~((uint32_t)0), \
  102. NULL, pStaticData, false, false, pImpl, \
  103. 0, UCNV_MBCS_TABLE_INITIALIZER \
  104. }
  105. /* Defines a UConverter, the lightweight mutable part the user sees */
  106. struct UConverter {
  107. /*
  108. * Error function pointer called when conversion issues
  109. * occur during a ucnv_fromUnicode call
  110. */
  111. void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context,
  112. UConverterFromUnicodeArgs *args,
  113. const UChar *codeUnits,
  114. int32_t length,
  115. UChar32 codePoint,
  116. UConverterCallbackReason reason,
  117. UErrorCode *);
  118. /*
  119. * Error function pointer called when conversion issues
  120. * occur during a ucnv_toUnicode call
  121. */
  122. void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context,
  123. UConverterToUnicodeArgs *args,
  124. const char *codeUnits,
  125. int32_t length,
  126. UConverterCallbackReason reason,
  127. UErrorCode *);
  128. /*
  129. * Pointer to additional data that depends on the converter type.
  130. * Used by ISO 2022, SCSU, GB 18030 converters, possibly more.
  131. */
  132. void *extraInfo;
  133. const void *fromUContext;
  134. const void *toUContext;
  135. /*
  136. * Pointer to charset bytes for substitution string if subCharLen>0,
  137. * or pointer to Unicode string (UChar *) if subCharLen<0.
  138. * subCharLen==0 is equivalent to using a skip callback.
  139. * If the pointer is !=subUChars then it is allocated with
  140. * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes.
  141. * The subUChars field is declared as UChar[] not uint8_t[] to
  142. * guarantee alignment for UChars.
  143. */
  144. uint8_t *subChars;
  145. UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
  146. uint32_t options; /* options flags from UConverterOpen, may contain additional bits */
  147. UBool sharedDataIsCached; /* true: shared data is in cache, don't destroy on ucnv_close() if 0 ref. false: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
  148. UBool isCopyLocal; /* true if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
  149. UBool isExtraLocal; /* true if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
  150. UBool useFallback;
  151. int8_t toULength; /* number of bytes in toUBytes */
  152. uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
  153. uint32_t toUnicodeStatus; /* Used to internalize stream status information */
  154. int32_t mode;
  155. uint32_t fromUnicodeStatus;
  156. /*
  157. * More fromUnicode() status. Serves 3 purposes:
  158. * - keeps a lead surrogate between buffers (similar to toUBytes[])
  159. * - keeps a lead surrogate at the end of the stream,
  160. * which the framework handles as truncated input
  161. * - if the fromUnicode() implementation returns to the framework
  162. * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
  163. * for this code point
  164. */
  165. UChar32 fromUChar32;
  166. /*
  167. * value for ucnv_getMaxCharSize()
  168. *
  169. * usually simply copied from the static data, but ucnvmbcs.c modifies
  170. * the value depending on the converter type and options
  171. */
  172. int8_t maxBytesPerUChar;
  173. int8_t subCharLen; /* length of the codepage specific character sequence */
  174. int8_t invalidCharLength;
  175. int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
  176. int8_t invalidUCharLength;
  177. int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */
  178. uint8_t subChar1; /* single-byte substitution character if different from subChar */
  179. UBool useSubChar1;
  180. char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
  181. uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
  182. UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */
  183. UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
  184. UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
  185. /* fields for conversion extension */
  186. /* store previous UChars/chars to continue partial matches */
  187. UChar32 preFromUFirstCP; /* >=0: partial match */
  188. UChar preFromU[UCNV_EXT_MAX_UCHARS];
  189. char preToU[UCNV_EXT_MAX_BYTES];
  190. int8_t preFromULength, preToULength; /* negative: replay */
  191. int8_t preToUFirstLength; /* length of first character */
  192. /* new fields for ICU 4.0 */
  193. UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
  194. };
  195. U_CDECL_END /* end of UConverter */
  196. #define CONVERTER_FILE_EXTENSION ".cnv"
  197. /**
  198. * Return the number of all converter names.
  199. * @param pErrorCode The error code
  200. * @return the number of all converter names
  201. */
  202. U_CFUNC uint16_t
  203. ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode);
  204. /**
  205. * Return the (n)th converter name in mixed case, or NULL
  206. * if there is none (typically, if the data cannot be loaded).
  207. * 0<=index<ucnv_io_countAvailableConverters().
  208. * @param n The number specifies which converter name to get
  209. * @param pErrorCode The error code
  210. * @return the (n)th converter name in mixed case, or NULL if there is none.
  211. */
  212. U_CFUNC const char *
  213. ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode);
  214. /**
  215. * Load a non-algorithmic converter.
  216. * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
  217. */
  218. U_CAPI UConverterSharedData *
  219. ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err);
  220. /**
  221. * Unload a non-algorithmic converter.
  222. * It must be sharedData->isReferenceCounted
  223. * and this function must be called inside umtx_lock(&cnvCacheMutex).
  224. */
  225. U_CAPI void
  226. ucnv_unload(UConverterSharedData *sharedData);
  227. /**
  228. * Swap ICU .cnv conversion tables. See udataswp.h.
  229. * @internal
  230. */
  231. U_CAPI int32_t U_EXPORT2
  232. ucnv_swap(const UDataSwapper *ds,
  233. const void *inData, int32_t length, void *outData,
  234. UErrorCode *pErrorCode);
  235. U_CAPI void U_EXPORT2
  236. ucnv_enableCleanup(void);
  237. #endif
  238. #endif /* _UCNV_BLD */