simpleformatter.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 2014-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. * simpleformatter.h
  9. */
  10. #ifndef __SIMPLEFORMATTER_H__
  11. #define __SIMPLEFORMATTER_H__
  12. /**
  13. * \file
  14. * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
  15. */
  16. #include "unicode/utypes.h"
  17. #if U_SHOW_CPLUSPLUS_API
  18. #include "unicode/unistr.h"
  19. U_NAMESPACE_BEGIN
  20. // Forward declaration:
  21. namespace number {
  22. namespace impl {
  23. class SimpleModifier;
  24. }
  25. }
  26. /**
  27. * Formats simple patterns like "{1} was born in {0}".
  28. * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
  29. * Supports only numbered arguments with no type nor style parameters,
  30. * and formats only string values.
  31. * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
  32. *
  33. * Factory methods set error codes for syntax errors
  34. * and for too few or too many arguments/placeholders.
  35. *
  36. * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
  37. *
  38. * Example:
  39. * <pre>
  40. * UErrorCode errorCode = U_ZERO_ERROR;
  41. * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
  42. * UnicodeString result;
  43. *
  44. * // Output: "paul {born} in england"
  45. * fmt.format("england", "paul", result, errorCode);
  46. * </pre>
  47. *
  48. * This class is not intended for public subclassing.
  49. *
  50. * @see MessageFormat
  51. * @see UMessagePatternApostropheMode
  52. * @stable ICU 57
  53. */
  54. class U_COMMON_API SimpleFormatter final : public UMemory {
  55. public:
  56. /**
  57. * Default constructor.
  58. * @stable ICU 57
  59. */
  60. SimpleFormatter() : compiledPattern((char16_t)0) {}
  61. /**
  62. * Constructs a formatter from the pattern string.
  63. *
  64. * @param pattern The pattern string.
  65. * @param errorCode ICU error code in/out parameter.
  66. * Must fulfill U_SUCCESS before the function call.
  67. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
  68. * @stable ICU 57
  69. */
  70. SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
  71. applyPattern(pattern, errorCode);
  72. }
  73. /**
  74. * Constructs a formatter from the pattern string.
  75. * The number of arguments checked against the given limits is the
  76. * highest argument number plus one, not the number of occurrences of arguments.
  77. *
  78. * @param pattern The pattern string.
  79. * @param min The pattern must have at least this many arguments.
  80. * @param max The pattern must have at most this many arguments.
  81. * @param errorCode ICU error code in/out parameter.
  82. * Must fulfill U_SUCCESS before the function call.
  83. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
  84. * too few or too many arguments.
  85. * @stable ICU 57
  86. */
  87. SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
  88. UErrorCode &errorCode) {
  89. applyPatternMinMaxArguments(pattern, min, max, errorCode);
  90. }
  91. /**
  92. * Copy constructor.
  93. * @stable ICU 57
  94. */
  95. SimpleFormatter(const SimpleFormatter& other)
  96. : compiledPattern(other.compiledPattern) {}
  97. /**
  98. * Assignment operator.
  99. * @stable ICU 57
  100. */
  101. SimpleFormatter &operator=(const SimpleFormatter& other);
  102. /**
  103. * Destructor.
  104. * @stable ICU 57
  105. */
  106. ~SimpleFormatter();
  107. /**
  108. * Changes this object according to the new pattern.
  109. *
  110. * @param pattern The pattern string.
  111. * @param errorCode ICU error code in/out parameter.
  112. * Must fulfill U_SUCCESS before the function call.
  113. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
  114. * @return true if U_SUCCESS(errorCode).
  115. * @stable ICU 57
  116. */
  117. UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
  118. return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
  119. }
  120. /**
  121. * Changes this object according to the new pattern.
  122. * The number of arguments checked against the given limits is the
  123. * highest argument number plus one, not the number of occurrences of arguments.
  124. *
  125. * @param pattern The pattern string.
  126. * @param min The pattern must have at least this many arguments.
  127. * @param max The pattern must have at most this many arguments.
  128. * @param errorCode ICU error code in/out parameter.
  129. * Must fulfill U_SUCCESS before the function call.
  130. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
  131. * too few or too many arguments.
  132. * @return true if U_SUCCESS(errorCode).
  133. * @stable ICU 57
  134. */
  135. UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
  136. int32_t min, int32_t max, UErrorCode &errorCode);
  137. /**
  138. * @return The max argument number + 1.
  139. * @stable ICU 57
  140. */
  141. int32_t getArgumentLimit() const {
  142. return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
  143. }
  144. /**
  145. * Formats the given value, appending to the appendTo builder.
  146. * The argument value must not be the same object as appendTo.
  147. * getArgumentLimit() must be at most 1.
  148. *
  149. * @param value0 Value for argument {0}.
  150. * @param appendTo Gets the formatted pattern and value appended.
  151. * @param errorCode ICU error code in/out parameter.
  152. * Must fulfill U_SUCCESS before the function call.
  153. * @return appendTo
  154. * @stable ICU 57
  155. */
  156. UnicodeString &format(
  157. const UnicodeString &value0,
  158. UnicodeString &appendTo, UErrorCode &errorCode) const;
  159. /**
  160. * Formats the given values, appending to the appendTo builder.
  161. * An argument value must not be the same object as appendTo.
  162. * getArgumentLimit() must be at most 2.
  163. *
  164. * @param value0 Value for argument {0}.
  165. * @param value1 Value for argument {1}.
  166. * @param appendTo Gets the formatted pattern and values appended.
  167. * @param errorCode ICU error code in/out parameter.
  168. * Must fulfill U_SUCCESS before the function call.
  169. * @return appendTo
  170. * @stable ICU 57
  171. */
  172. UnicodeString &format(
  173. const UnicodeString &value0,
  174. const UnicodeString &value1,
  175. UnicodeString &appendTo, UErrorCode &errorCode) const;
  176. /**
  177. * Formats the given values, appending to the appendTo builder.
  178. * An argument value must not be the same object as appendTo.
  179. * getArgumentLimit() must be at most 3.
  180. *
  181. * @param value0 Value for argument {0}.
  182. * @param value1 Value for argument {1}.
  183. * @param value2 Value for argument {2}.
  184. * @param appendTo Gets the formatted pattern and values appended.
  185. * @param errorCode ICU error code in/out parameter.
  186. * Must fulfill U_SUCCESS before the function call.
  187. * @return appendTo
  188. * @stable ICU 57
  189. */
  190. UnicodeString &format(
  191. const UnicodeString &value0,
  192. const UnicodeString &value1,
  193. const UnicodeString &value2,
  194. UnicodeString &appendTo, UErrorCode &errorCode) const;
  195. /**
  196. * Formats the given values, appending to the appendTo string.
  197. *
  198. * @param values The argument values.
  199. * An argument value must not be the same object as appendTo.
  200. * Can be nullptr if valuesLength==getArgumentLimit()==0.
  201. * @param valuesLength The length of the values array.
  202. * Must be at least getArgumentLimit().
  203. * @param appendTo Gets the formatted pattern and values appended.
  204. * @param offsets offsets[i] receives the offset of where
  205. * values[i] replaced pattern argument {i}.
  206. * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
  207. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  208. * @param offsetsLength The length of the offsets array.
  209. * @param errorCode ICU error code in/out parameter.
  210. * Must fulfill U_SUCCESS before the function call.
  211. * @return appendTo
  212. * @stable ICU 57
  213. */
  214. UnicodeString &formatAndAppend(
  215. const UnicodeString *const *values, int32_t valuesLength,
  216. UnicodeString &appendTo,
  217. int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
  218. /**
  219. * Formats the given values, replacing the contents of the result string.
  220. * May optimize by actually appending to the result if it is the same object
  221. * as the value corresponding to the initial argument in the pattern.
  222. *
  223. * @param values The argument values.
  224. * An argument value may be the same object as result.
  225. * Can be nullptr if valuesLength==getArgumentLimit()==0.
  226. * @param valuesLength The length of the values array.
  227. * Must be at least getArgumentLimit().
  228. * @param result Gets its contents replaced by the formatted pattern and values.
  229. * @param offsets offsets[i] receives the offset of where
  230. * values[i] replaced pattern argument {i}.
  231. * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
  232. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  233. * @param offsetsLength The length of the offsets array.
  234. * @param errorCode ICU error code in/out parameter.
  235. * Must fulfill U_SUCCESS before the function call.
  236. * @return result
  237. * @stable ICU 57
  238. */
  239. UnicodeString &formatAndReplace(
  240. const UnicodeString *const *values, int32_t valuesLength,
  241. UnicodeString &result,
  242. int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
  243. /**
  244. * Returns the pattern text with none of the arguments.
  245. * Like formatting with all-empty string values.
  246. * @stable ICU 57
  247. */
  248. UnicodeString getTextWithNoArguments() const {
  249. return getTextWithNoArguments(
  250. compiledPattern.getBuffer(),
  251. compiledPattern.length(),
  252. nullptr,
  253. 0);
  254. }
  255. #ifndef U_HIDE_INTERNAL_API
  256. /**
  257. * Returns the pattern text with none of the arguments.
  258. * Like formatting with all-empty string values.
  259. *
  260. * TODO(ICU-20406): Replace this with an Iterator interface.
  261. *
  262. * @param offsets offsets[i] receives the offset of where {i} was located
  263. * before it was replaced by an empty string.
  264. * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
  265. * Can be nullptr if offsetsLength==0.
  266. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  267. * @param offsetsLength The length of the offsets array.
  268. *
  269. * @internal
  270. */
  271. UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
  272. return getTextWithNoArguments(
  273. compiledPattern.getBuffer(),
  274. compiledPattern.length(),
  275. offsets,
  276. offsetsLength);
  277. }
  278. #endif // U_HIDE_INTERNAL_API
  279. private:
  280. /**
  281. * Binary representation of the compiled pattern.
  282. * Index 0: One more than the highest argument number.
  283. * Followed by zero or more arguments or literal-text segments.
  284. *
  285. * An argument is stored as its number, less than ARG_NUM_LIMIT.
  286. * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
  287. * followed by that many chars.
  288. */
  289. UnicodeString compiledPattern;
  290. static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
  291. int32_t compiledPatternLength) {
  292. return compiledPatternLength == 0 ? 0 : compiledPattern[0];
  293. }
  294. static UnicodeString getTextWithNoArguments(
  295. const char16_t *compiledPattern,
  296. int32_t compiledPatternLength,
  297. int32_t *offsets,
  298. int32_t offsetsLength);
  299. static UnicodeString &format(
  300. const char16_t *compiledPattern, int32_t compiledPatternLength,
  301. const UnicodeString *const *values,
  302. UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
  303. int32_t *offsets, int32_t offsetsLength,
  304. UErrorCode &errorCode);
  305. // Give access to internals to SimpleModifier for number formatting
  306. friend class number::impl::SimpleModifier;
  307. };
  308. U_NAMESPACE_END
  309. #endif /* U_SHOW_CPLUSPLUS_API */
  310. #endif // __SIMPLEFORMATTER_H__