ubiditransform.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. /*
  2. ******************************************************************************
  3. *
  4. * © 2016 and later: Unicode, Inc. and others.
  5. * License & terms of use: http://www.unicode.org/copyright.html
  6. *
  7. ******************************************************************************
  8. * file name: ubiditransform.h
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2016jul24
  14. * created by: Lina Kemmel
  15. *
  16. */
  17. #ifndef UBIDITRANSFORM_H
  18. #define UBIDITRANSFORM_H
  19. #include "unicode/utypes.h"
  20. #include "unicode/ubidi.h"
  21. #include "unicode/uchar.h"
  22. #if U_SHOW_CPLUSPLUS_API
  23. #include "unicode/localpointer.h"
  24. #endif // U_SHOW_CPLUSPLUS_API
  25. /**
  26. * \file
  27. * \brief C API: Bidi Transformations
  28. */
  29. /**
  30. * `UBiDiOrder` indicates the order of text.
  31. *
  32. * This bidi transformation engine supports all possible combinations (4 in
  33. * total) of input and output text order:
  34. *
  35. * - <logical input, visual output>: unless the output direction is RTL, this
  36. * corresponds to a normal operation of the Bidi algorithm as described in the
  37. * Unicode Technical Report and implemented by `UBiDi` when the
  38. * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
  39. * mode is not supported by `UBiDi` and is accomplished through
  40. * reversing a visual LTR string,
  41. *
  42. * - <visual input, logical output>: unless the input direction is RTL, this
  43. * corresponds to an "inverse bidi algorithm" in `UBiDi` with the
  44. * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
  45. * Visual RTL mode is not not supported by `UBiDi` and is
  46. * accomplished through reversing a visual LTR string,
  47. *
  48. * - <logical input, logical output>: if the input and output base directions
  49. * mismatch, this corresponds to the `UBiDi` implementation with the
  50. * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
  51. * input and output base directions are identical, the transformation engine
  52. * will only handle character mirroring and Arabic shaping operations without
  53. * reordering,
  54. *
  55. * - <visual input, visual output>: this reordering mode is not supported by
  56. * the `UBiDi` engine; it implies character mirroring, Arabic
  57. * shaping, and - if the input/output base directions mismatch - string
  58. * reverse operations.
  59. * @see ubidi_setInverse
  60. * @see ubidi_setReorderingMode
  61. * @see UBIDI_REORDER_DEFAULT
  62. * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
  63. * @see UBIDI_REORDER_RUNS_ONLY
  64. * @stable ICU 58
  65. */
  66. typedef enum {
  67. /** 0: Constant indicating a logical order.
  68. * This is the default for input text.
  69. * @stable ICU 58
  70. */
  71. UBIDI_LOGICAL = 0,
  72. /** 1: Constant indicating a visual order.
  73. * This is a default for output text.
  74. * @stable ICU 58
  75. */
  76. UBIDI_VISUAL
  77. } UBiDiOrder;
  78. /**
  79. * <code>UBiDiMirroring</code> indicates whether or not characters with the
  80. * "mirrored" property in RTL runs should be replaced with their mirror-image
  81. * counterparts.
  82. * @see UBIDI_DO_MIRRORING
  83. * @see ubidi_setReorderingOptions
  84. * @see ubidi_writeReordered
  85. * @see ubidi_writeReverse
  86. * @stable ICU 58
  87. */
  88. typedef enum {
  89. /** 0: Constant indicating that character mirroring should not be
  90. * performed.
  91. * This is the default.
  92. * @stable ICU 58
  93. */
  94. UBIDI_MIRRORING_OFF = 0,
  95. /** 1: Constant indicating that character mirroring should be performed.
  96. * This corresponds to calling <code>ubidi_writeReordered</code> or
  97. * <code>ubidi_writeReverse</code> with the
  98. * <code>UBIDI_DO_MIRRORING</code> option bit set.
  99. * @stable ICU 58
  100. */
  101. UBIDI_MIRRORING_ON
  102. } UBiDiMirroring;
  103. /**
  104. * Forward declaration of the <code>UBiDiTransform</code> structure that stores
  105. * information used by the layout transformation engine.
  106. * @stable ICU 58
  107. */
  108. typedef struct UBiDiTransform UBiDiTransform;
  109. /**
  110. * Performs transformation of text from the bidi layout defined by the input
  111. * ordering scheme to the bidi layout defined by the output ordering scheme,
  112. * and applies character mirroring and Arabic shaping operations.<p>
  113. * In terms of <code>UBiDi</code>, such a transformation implies:
  114. * <ul>
  115. * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
  116. * reordering mode is other than normal),</li>
  117. * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
  118. * transformed from a visual to a logical form),</li>
  119. * <li>resolving embedding levels of each character in the input text by
  120. * calling <code>ubidi_setPara</code>,</li>
  121. * <li>reordering the characters based on the computed embedding levels, also
  122. * performing character mirroring as needed, and streaming the result to the
  123. * output, by calling <code>ubidi_writeReordered</code>,</li>
  124. * <li>performing Arabic digit and letter shaping on the output text by calling
  125. * <code>u_shapeArabic</code>.</li>
  126. * </ul>
  127. * An "ordering scheme" encompasses the base direction and the order of text,
  128. * and these characteristics must be defined by the caller for both input and
  129. * output explicitly .<p>
  130. * There are 36 possible combinations of <input, output> ordering schemes,
  131. * which are partially supported by <code>UBiDi</code> already. Examples of the
  132. * currently supported combinations:
  133. * <ul>
  134. * <li><Logical LTR, Visual LTR>: this is equivalent to calling
  135. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
  136. * <li><Logical RTL, Visual LTR>: this is equivalent to calling
  137. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
  138. * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
  139. * calling <code>ubidi_setPara</code> with
  140. * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
  141. * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
  142. * calling <code>ubidi_setPara</code> with
  143. * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
  144. * <li><Visual LTR, Logical LTR>: this is equivalent to
  145. * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
  146. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
  147. * <li><Visual LTR, Logical RTL>: this is equivalent to
  148. * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
  149. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
  150. * </ul>
  151. * All combinations that involve the Visual RTL scheme are unsupported by
  152. * <code>UBiDi</code>, for instance:
  153. * <ul>
  154. * <li><Logical LTR, Visual RTL>,</li>
  155. * <li><Visual RTL, Logical RTL>.</li>
  156. * </ul>
  157. * <p>Example of usage of the transformation engine:<br>
  158. * <pre>
  159. * \code
  160. * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
  161. * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
  162. * UErrorCode errorCode = U_ZERO_ERROR;
  163. * // Run a transformation.
  164. * ubiditransform_transform(pBidiTransform,
  165. * text1, -1, text2, -1,
  166. * UBIDI_LTR, UBIDI_VISUAL,
  167. * UBIDI_RTL, UBIDI_LOGICAL,
  168. * UBIDI_MIRRORING_OFF,
  169. * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
  170. * &errorCode);
  171. * // Do something with text2.
  172. * text2[4] = '2';
  173. * // Run a reverse transformation.
  174. * ubiditransform_transform(pBidiTransform,
  175. * text2, -1, text1, -1,
  176. * UBIDI_RTL, UBIDI_LOGICAL,
  177. * UBIDI_LTR, UBIDI_VISUAL,
  178. * UBIDI_MIRRORING_OFF,
  179. * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
  180. * &errorCode);
  181. *\endcode
  182. * </pre>
  183. * </p>
  184. *
  185. * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
  186. * allocated with <code>ubiditransform_open()</code> or
  187. * <code>NULL</code>.<p>
  188. * This object serves for one-time setup to amortize initialization
  189. * overheads. Use of this object is not thread-safe. All other threads
  190. * should allocate a new <code>UBiDiTransform</code> object by calling
  191. * <code>ubiditransform_open()</code> before using it. Alternatively,
  192. * a caller can set this parameter to <code>NULL</code>, in which case
  193. * the object will be allocated by the engine on the fly.</p>
  194. * @param src A pointer to the text that the Bidi layout transformations will
  195. * be performed on.
  196. * <p><strong>Note:</strong> the text must be (at least)
  197. * <code>srcLength</code> long.</p>
  198. * @param srcLength The length of the text, in number of UChars. If
  199. * <code>length == -1</code> then the text must be zero-terminated.
  200. * @param dest A pointer to where the processed text is to be copied.
  201. * @param destSize The size of the <code>dest</code> buffer, in number of
  202. * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
  203. * then the destination length could be as large as
  204. * <code>srcLength * 2</code>. Otherwise, the destination length will
  205. * not exceed <code>srcLength</code>. If the caller reserves the last
  206. * position for zero-termination, it should be excluded from
  207. * <code>destSize</code>.
  208. * <p><code>destSize == -1</code> is allowed and makes sense when
  209. * <code>dest</code> was holds some meaningful value, e.g. that of
  210. * <code>src</code>. In this case <code>dest</code> must be
  211. * zero-terminated.</p>
  212. * @param inParaLevel A base embedding level of the input as defined in
  213. * <code>ubidi_setPara</code> documentation for the
  214. * <code>paraLevel</code> parameter.
  215. * @param inOrder An order of the input, which can be one of the
  216. * <code>UBiDiOrder</code> values.
  217. * @param outParaLevel A base embedding level of the output as defined in
  218. * <code>ubidi_setPara</code> documentation for the
  219. * <code>paraLevel</code> parameter.
  220. * @param outOrder An order of the output, which can be one of the
  221. * <code>UBiDiOrder</code> values.
  222. * @param doMirroring Indicates whether or not to perform character mirroring,
  223. * and can accept one of the <code>UBiDiMirroring</code> values.
  224. * @param shapingOptions Arabic digit and letter shaping options defined in the
  225. * ushape.h documentation.
  226. * <p><strong>Note:</strong> Direction indicator options are computed by
  227. * the transformation engine based on the effective ordering schemes, so
  228. * user-defined direction indicators will be ignored.</p>
  229. * @param pErrorCode A pointer to an error code value.
  230. *
  231. * @return The destination length, i.e. the number of UChars written to
  232. * <code>dest</code>. If the transformation fails, the return value
  233. * will be 0 (and the error code will be written to
  234. * <code>pErrorCode</code>).
  235. *
  236. * @see UBiDiLevel
  237. * @see UBiDiOrder
  238. * @see UBiDiMirroring
  239. * @see ubidi_setPara
  240. * @see u_shapeArabic
  241. * @stable ICU 58
  242. */
  243. U_CAPI uint32_t U_EXPORT2
  244. ubiditransform_transform(UBiDiTransform *pBiDiTransform,
  245. const UChar *src, int32_t srcLength,
  246. UChar *dest, int32_t destSize,
  247. UBiDiLevel inParaLevel, UBiDiOrder inOrder,
  248. UBiDiLevel outParaLevel, UBiDiOrder outOrder,
  249. UBiDiMirroring doMirroring, uint32_t shapingOptions,
  250. UErrorCode *pErrorCode);
  251. /**
  252. * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
  253. * e.g. with different ordering schemes, mirroring or shaping options.<p>
  254. * <strong>Note:</strong>The object can only be reused in the same thread.
  255. * All other threads should allocate a new <code>UBiDiTransform</code> object
  256. * before using it.<p>
  257. * Example of usage:<p>
  258. * <pre>
  259. * \code
  260. * UErrorCode errorCode = U_ZERO_ERROR;
  261. * // Open a new UBiDiTransform.
  262. * UBiDiTransform* transform = ubiditransform_open(&errorCode);
  263. * // Run a transformation.
  264. * ubiditransform_transform(transform,
  265. * text1, -1, text2, -1,
  266. * UBIDI_RTL, UBIDI_LOGICAL,
  267. * UBIDI_LTR, UBIDI_VISUAL,
  268. * UBIDI_MIRRORING_ON,
  269. * U_SHAPE_DIGITS_EN2AN,
  270. * &errorCode);
  271. * // Do something with the output text and invoke another transformation using
  272. * // that text as input.
  273. * ubiditransform_transform(transform,
  274. * text2, -1, text3, -1,
  275. * UBIDI_LTR, UBIDI_VISUAL,
  276. * UBIDI_RTL, UBIDI_VISUAL,
  277. * UBIDI_MIRRORING_ON,
  278. * 0, &errorCode);
  279. *\endcode
  280. * </pre>
  281. * <p>
  282. * The <code>UBiDiTransform</code> object must be deallocated by calling
  283. * <code>ubiditransform_close()</code>.
  284. *
  285. * @return An empty <code>UBiDiTransform</code> object.
  286. * @stable ICU 58
  287. */
  288. U_CAPI UBiDiTransform* U_EXPORT2
  289. ubiditransform_open(UErrorCode *pErrorCode);
  290. /**
  291. * Deallocates the given <code>UBiDiTransform</code> object.
  292. * @stable ICU 58
  293. */
  294. U_CAPI void U_EXPORT2
  295. ubiditransform_close(UBiDiTransform *pBidiTransform);
  296. #if U_SHOW_CPLUSPLUS_API
  297. U_NAMESPACE_BEGIN
  298. /**
  299. * \class LocalUBiDiTransformPointer
  300. * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
  301. * For most methods see the LocalPointerBase base class.
  302. *
  303. * @see LocalPointerBase
  304. * @see LocalPointer
  305. * @stable ICU 58
  306. */
  307. U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
  308. U_NAMESPACE_END
  309. #endif
  310. #endif