messagepattern.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2011-2013, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: messagepattern.h
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2011mar14
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __MESSAGEPATTERN_H__
  17. #define __MESSAGEPATTERN_H__
  18. /**
  19. * \file
  20. * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
  21. */
  22. #include "unicode/utypes.h"
  23. #if U_SHOW_CPLUSPLUS_API
  24. #if !UCONFIG_NO_FORMATTING
  25. #include "unicode/parseerr.h"
  26. #include "unicode/unistr.h"
  27. /**
  28. * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
  29. * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
  30. * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
  31. * <p>
  32. * A pair of adjacent apostrophes always results in a single apostrophe in the output,
  33. * even when the pair is between two single, text-quoting apostrophes.
  34. * <p>
  35. * The following table shows examples of desired MessageFormat.format() output
  36. * with the pattern strings that yield that output.
  37. * <p>
  38. * <table>
  39. * <tr>
  40. * <th>Desired output</th>
  41. * <th>DOUBLE_OPTIONAL</th>
  42. * <th>DOUBLE_REQUIRED</th>
  43. * </tr>
  44. * <tr>
  45. * <td>I see {many}</td>
  46. * <td>I see '{many}'</td>
  47. * <td>(same)</td>
  48. * </tr>
  49. * <tr>
  50. * <td>I said {'Wow!'}</td>
  51. * <td>I said '{''Wow!''}'</td>
  52. * <td>(same)</td>
  53. * </tr>
  54. * <tr>
  55. * <td>I don't know</td>
  56. * <td>I don't know OR<br> I don''t know</td>
  57. * <td>I don''t know</td>
  58. * </tr>
  59. * </table>
  60. * @stable ICU 4.8
  61. * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
  62. */
  63. enum UMessagePatternApostropheMode {
  64. /**
  65. * A literal apostrophe is represented by
  66. * either a single or a double apostrophe pattern character.
  67. * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
  68. * if it immediately precedes a curly brace {},
  69. * or a pipe symbol | if inside a choice format,
  70. * or a pound symbol # if inside a plural format.
  71. * <p>
  72. * This is the default behavior starting with ICU 4.8.
  73. * @stable ICU 4.8
  74. */
  75. UMSGPAT_APOS_DOUBLE_OPTIONAL,
  76. /**
  77. * A literal apostrophe must be represented by
  78. * a double apostrophe pattern character.
  79. * A single apostrophe always starts quoted literal text.
  80. * <p>
  81. * This is the behavior of ICU 4.6 and earlier, and of the JDK.
  82. * @stable ICU 4.8
  83. */
  84. UMSGPAT_APOS_DOUBLE_REQUIRED
  85. };
  86. /**
  87. * @stable ICU 4.8
  88. */
  89. typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
  90. /**
  91. * MessagePattern::Part type constants.
  92. * @stable ICU 4.8
  93. */
  94. enum UMessagePatternPartType {
  95. /**
  96. * Start of a message pattern (main or nested).
  97. * The length is 0 for the top-level message
  98. * and for a choice argument sub-message, otherwise 1 for the '{'.
  99. * The value indicates the nesting level, starting with 0 for the main message.
  100. * <p>
  101. * There is always a later MSG_LIMIT part.
  102. * @stable ICU 4.8
  103. */
  104. UMSGPAT_PART_TYPE_MSG_START,
  105. /**
  106. * End of a message pattern (main or nested).
  107. * The length is 0 for the top-level message and
  108. * the last sub-message of a choice argument,
  109. * otherwise 1 for the '}' or (in a choice argument style) the '|'.
  110. * The value indicates the nesting level, starting with 0 for the main message.
  111. * @stable ICU 4.8
  112. */
  113. UMSGPAT_PART_TYPE_MSG_LIMIT,
  114. /**
  115. * Indicates a substring of the pattern string which is to be skipped when formatting.
  116. * For example, an apostrophe that begins or ends quoted text
  117. * would be indicated with such a part.
  118. * The value is undefined and currently always 0.
  119. * @stable ICU 4.8
  120. */
  121. UMSGPAT_PART_TYPE_SKIP_SYNTAX,
  122. /**
  123. * Indicates that a syntax character needs to be inserted for auto-quoting.
  124. * The length is 0.
  125. * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
  126. * @stable ICU 4.8
  127. */
  128. UMSGPAT_PART_TYPE_INSERT_CHAR,
  129. /**
  130. * Indicates a syntactic (non-escaped) # symbol in a plural variant.
  131. * When formatting, replace this part's substring with the
  132. * (value-offset) for the plural argument value.
  133. * The value is undefined and currently always 0.
  134. * @stable ICU 4.8
  135. */
  136. UMSGPAT_PART_TYPE_REPLACE_NUMBER,
  137. /**
  138. * Start of an argument.
  139. * The length is 1 for the '{'.
  140. * The value is the ordinal value of the ArgType. Use getArgType().
  141. * <p>
  142. * This part is followed by either an ARG_NUMBER or ARG_NAME,
  143. * followed by optional argument sub-parts (see UMessagePatternArgType constants)
  144. * and finally an ARG_LIMIT part.
  145. * @stable ICU 4.8
  146. */
  147. UMSGPAT_PART_TYPE_ARG_START,
  148. /**
  149. * End of an argument.
  150. * The length is 1 for the '}'.
  151. * The value is the ordinal value of the ArgType. Use getArgType().
  152. * @stable ICU 4.8
  153. */
  154. UMSGPAT_PART_TYPE_ARG_LIMIT,
  155. /**
  156. * The argument number, provided by the value.
  157. * @stable ICU 4.8
  158. */
  159. UMSGPAT_PART_TYPE_ARG_NUMBER,
  160. /**
  161. * The argument name.
  162. * The value is undefined and currently always 0.
  163. * @stable ICU 4.8
  164. */
  165. UMSGPAT_PART_TYPE_ARG_NAME,
  166. /**
  167. * The argument type.
  168. * The value is undefined and currently always 0.
  169. * @stable ICU 4.8
  170. */
  171. UMSGPAT_PART_TYPE_ARG_TYPE,
  172. /**
  173. * The argument style text.
  174. * The value is undefined and currently always 0.
  175. * @stable ICU 4.8
  176. */
  177. UMSGPAT_PART_TYPE_ARG_STYLE,
  178. /**
  179. * A selector substring in a "complex" argument style.
  180. * The value is undefined and currently always 0.
  181. * @stable ICU 4.8
  182. */
  183. UMSGPAT_PART_TYPE_ARG_SELECTOR,
  184. /**
  185. * An integer value, for example the offset or an explicit selector value
  186. * in a PluralFormat style.
  187. * The part value is the integer value.
  188. * @stable ICU 4.8
  189. */
  190. UMSGPAT_PART_TYPE_ARG_INT,
  191. /**
  192. * A numeric value, for example the offset or an explicit selector value
  193. * in a PluralFormat style.
  194. * The part value is an index into an internal array of numeric values;
  195. * use getNumericValue().
  196. * @stable ICU 4.8
  197. */
  198. UMSGPAT_PART_TYPE_ARG_DOUBLE
  199. };
  200. /**
  201. * @stable ICU 4.8
  202. */
  203. typedef enum UMessagePatternPartType UMessagePatternPartType;
  204. /**
  205. * Argument type constants.
  206. * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
  207. *
  208. * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
  209. * with a nesting level one greater than the surrounding message.
  210. * @stable ICU 4.8
  211. */
  212. enum UMessagePatternArgType {
  213. /**
  214. * The argument has no specified type.
  215. * @stable ICU 4.8
  216. */
  217. UMSGPAT_ARG_TYPE_NONE,
  218. /**
  219. * The argument has a "simple" type which is provided by the ARG_TYPE part.
  220. * An ARG_STYLE part might follow that.
  221. * @stable ICU 4.8
  222. */
  223. UMSGPAT_ARG_TYPE_SIMPLE,
  224. /**
  225. * The argument is a ChoiceFormat with one or more
  226. * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
  227. * @stable ICU 4.8
  228. */
  229. UMSGPAT_ARG_TYPE_CHOICE,
  230. /**
  231. * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
  232. * (e.g., offset:1)
  233. * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
  234. * If the selector has an explicit value (e.g., =2), then
  235. * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
  236. * Otherwise the message immediately follows the ARG_SELECTOR.
  237. * @stable ICU 4.8
  238. */
  239. UMSGPAT_ARG_TYPE_PLURAL,
  240. /**
  241. * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
  242. * @stable ICU 4.8
  243. */
  244. UMSGPAT_ARG_TYPE_SELECT,
  245. /**
  246. * The argument is an ordinal-number PluralFormat
  247. * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
  248. * @stable ICU 50
  249. */
  250. UMSGPAT_ARG_TYPE_SELECTORDINAL
  251. };
  252. /**
  253. * @stable ICU 4.8
  254. */
  255. typedef enum UMessagePatternArgType UMessagePatternArgType;
  256. /**
  257. * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
  258. * Returns true if the argument type has a plural style part sequence and semantics,
  259. * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
  260. * @stable ICU 50
  261. */
  262. #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
  263. ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
  264. enum {
  265. /**
  266. * Return value from MessagePattern.validateArgumentName() for when
  267. * the string is a valid "pattern identifier" but not a number.
  268. * @stable ICU 4.8
  269. */
  270. UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
  271. /**
  272. * Return value from MessagePattern.validateArgumentName() for when
  273. * the string is invalid.
  274. * It might not be a valid "pattern identifier",
  275. * or it have only ASCII digits but there is a leading zero or the number is too large.
  276. * @stable ICU 4.8
  277. */
  278. UMSGPAT_ARG_NAME_NOT_VALID=-2
  279. };
  280. /**
  281. * Special value that is returned by getNumericValue(Part) when no
  282. * numeric value is defined for a part.
  283. * @see MessagePattern.getNumericValue()
  284. * @stable ICU 4.8
  285. */
  286. #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
  287. U_NAMESPACE_BEGIN
  288. class MessagePatternDoubleList;
  289. class MessagePatternPartsList;
  290. /**
  291. * Parses and represents ICU MessageFormat patterns.
  292. * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
  293. * Used in the implementations of those classes as well as in tools
  294. * for message validation, translation and format conversion.
  295. * <p>
  296. * The parser handles all syntax relevant for identifying message arguments.
  297. * This includes "complex" arguments whose style strings contain
  298. * nested MessageFormat pattern substrings.
  299. * For "simple" arguments (with no nested MessageFormat pattern substrings),
  300. * the argument style is not parsed any further.
  301. * <p>
  302. * The parser handles named and numbered message arguments and allows both in one message.
  303. * <p>
  304. * Once a pattern has been parsed successfully, iterate through the parsed data
  305. * with countParts(), getPart() and related methods.
  306. * <p>
  307. * The data logically represents a parse tree, but is stored and accessed
  308. * as a list of "parts" for fast and simple parsing and to minimize object allocations.
  309. * Arguments and nested messages are best handled via recursion.
  310. * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
  311. * the index of the corresponding _LIMIT "part".
  312. * <p>
  313. * List of "parts":
  314. * <pre>
  315. * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
  316. * argument = noneArg | simpleArg | complexArg
  317. * complexArg = choiceArg | pluralArg | selectArg
  318. *
  319. * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
  320. * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
  321. * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
  322. * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
  323. * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
  324. *
  325. * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
  326. * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
  327. * selectStyle = (ARG_SELECTOR message)+
  328. * </pre>
  329. * <ul>
  330. * <li>Literal output text is not represented directly by "parts" but accessed
  331. * between parts of a message, from one part's getLimit() to the next part's getIndex().
  332. * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
  333. * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
  334. * the less-than-or-equal-to sign (U+2264).
  335. * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
  336. * The optional numeric Part between each (ARG_SELECTOR, message) pair
  337. * is the value of an explicit-number selector like "=2",
  338. * otherwise the selector is a non-numeric identifier.
  339. * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
  340. * </ul>
  341. * <p>
  342. * This class is not intended for public subclassing.
  343. *
  344. * @stable ICU 4.8
  345. */
  346. class U_COMMON_API MessagePattern : public UObject {
  347. public:
  348. /**
  349. * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
  350. * @param errorCode Standard ICU error code. Its input value must
  351. * pass the U_SUCCESS() test, or else the function returns
  352. * immediately. Check for U_FAILURE() on output or use with
  353. * function chaining. (See User Guide for details.)
  354. * @stable ICU 4.8
  355. */
  356. MessagePattern(UErrorCode &errorCode);
  357. /**
  358. * Constructs an empty MessagePattern.
  359. * @param mode Explicit UMessagePatternApostropheMode.
  360. * @param errorCode Standard ICU error code. Its input value must
  361. * pass the U_SUCCESS() test, or else the function returns
  362. * immediately. Check for U_FAILURE() on output or use with
  363. * function chaining. (See User Guide for details.)
  364. * @stable ICU 4.8
  365. */
  366. MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
  367. /**
  368. * Constructs a MessagePattern with default UMessagePatternApostropheMode and
  369. * parses the MessageFormat pattern string.
  370. * @param pattern a MessageFormat pattern string
  371. * @param parseError Struct to receive information on the position
  372. * of an error within the pattern.
  373. * Can be nullptr.
  374. * @param errorCode Standard ICU error code. Its input value must
  375. * pass the U_SUCCESS() test, or else the function returns
  376. * immediately. Check for U_FAILURE() on output or use with
  377. * function chaining. (See User Guide for details.)
  378. * TODO: turn @throws into UErrorCode specifics?
  379. * @throws IllegalArgumentException for syntax errors in the pattern string
  380. * @throws IndexOutOfBoundsException if certain limits are exceeded
  381. * (e.g., argument number too high, argument name too long, etc.)
  382. * @throws NumberFormatException if a number could not be parsed
  383. * @stable ICU 4.8
  384. */
  385. MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
  386. /**
  387. * Copy constructor.
  388. * @param other Object to copy.
  389. * @stable ICU 4.8
  390. */
  391. MessagePattern(const MessagePattern &other);
  392. /**
  393. * Assignment operator.
  394. * @param other Object to copy.
  395. * @return *this=other
  396. * @stable ICU 4.8
  397. */
  398. MessagePattern &operator=(const MessagePattern &other);
  399. /**
  400. * Destructor.
  401. * @stable ICU 4.8
  402. */
  403. virtual ~MessagePattern();
  404. /**
  405. * Parses a MessageFormat pattern string.
  406. * @param pattern a MessageFormat pattern string
  407. * @param parseError Struct to receive information on the position
  408. * of an error within the pattern.
  409. * Can be nullptr.
  410. * @param errorCode Standard ICU error code. Its input value must
  411. * pass the U_SUCCESS() test, or else the function returns
  412. * immediately. Check for U_FAILURE() on output or use with
  413. * function chaining. (See User Guide for details.)
  414. * @return *this
  415. * @throws IllegalArgumentException for syntax errors in the pattern string
  416. * @throws IndexOutOfBoundsException if certain limits are exceeded
  417. * (e.g., argument number too high, argument name too long, etc.)
  418. * @throws NumberFormatException if a number could not be parsed
  419. * @stable ICU 4.8
  420. */
  421. MessagePattern &parse(const UnicodeString &pattern,
  422. UParseError *parseError, UErrorCode &errorCode);
  423. /**
  424. * Parses a ChoiceFormat pattern string.
  425. * @param pattern a ChoiceFormat pattern string
  426. * @param parseError Struct to receive information on the position
  427. * of an error within the pattern.
  428. * Can be nullptr.
  429. * @param errorCode Standard ICU error code. Its input value must
  430. * pass the U_SUCCESS() test, or else the function returns
  431. * immediately. Check for U_FAILURE() on output or use with
  432. * function chaining. (See User Guide for details.)
  433. * @return *this
  434. * @throws IllegalArgumentException for syntax errors in the pattern string
  435. * @throws IndexOutOfBoundsException if certain limits are exceeded
  436. * (e.g., argument number too high, argument name too long, etc.)
  437. * @throws NumberFormatException if a number could not be parsed
  438. * @stable ICU 4.8
  439. */
  440. MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
  441. UParseError *parseError, UErrorCode &errorCode);
  442. /**
  443. * Parses a PluralFormat pattern string.
  444. * @param pattern a PluralFormat pattern string
  445. * @param parseError Struct to receive information on the position
  446. * of an error within the pattern.
  447. * Can be nullptr.
  448. * @param errorCode Standard ICU error code. Its input value must
  449. * pass the U_SUCCESS() test, or else the function returns
  450. * immediately. Check for U_FAILURE() on output or use with
  451. * function chaining. (See User Guide for details.)
  452. * @return *this
  453. * @throws IllegalArgumentException for syntax errors in the pattern string
  454. * @throws IndexOutOfBoundsException if certain limits are exceeded
  455. * (e.g., argument number too high, argument name too long, etc.)
  456. * @throws NumberFormatException if a number could not be parsed
  457. * @stable ICU 4.8
  458. */
  459. MessagePattern &parsePluralStyle(const UnicodeString &pattern,
  460. UParseError *parseError, UErrorCode &errorCode);
  461. /**
  462. * Parses a SelectFormat pattern string.
  463. * @param pattern a SelectFormat pattern string
  464. * @param parseError Struct to receive information on the position
  465. * of an error within the pattern.
  466. * Can be nullptr.
  467. * @param errorCode Standard ICU error code. Its input value must
  468. * pass the U_SUCCESS() test, or else the function returns
  469. * immediately. Check for U_FAILURE() on output or use with
  470. * function chaining. (See User Guide for details.)
  471. * @return *this
  472. * @throws IllegalArgumentException for syntax errors in the pattern string
  473. * @throws IndexOutOfBoundsException if certain limits are exceeded
  474. * (e.g., argument number too high, argument name too long, etc.)
  475. * @throws NumberFormatException if a number could not be parsed
  476. * @stable ICU 4.8
  477. */
  478. MessagePattern &parseSelectStyle(const UnicodeString &pattern,
  479. UParseError *parseError, UErrorCode &errorCode);
  480. /**
  481. * Clears this MessagePattern.
  482. * countParts() will return 0.
  483. * @stable ICU 4.8
  484. */
  485. void clear();
  486. /**
  487. * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
  488. * countParts() will return 0.
  489. * @param mode The new UMessagePatternApostropheMode.
  490. * @stable ICU 4.8
  491. */
  492. void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
  493. clear();
  494. aposMode=mode;
  495. }
  496. /**
  497. * @param other another object to compare with.
  498. * @return true if this object is equivalent to the other one.
  499. * @stable ICU 4.8
  500. */
  501. bool operator==(const MessagePattern &other) const;
  502. /**
  503. * @param other another object to compare with.
  504. * @return false if this object is equivalent to the other one.
  505. * @stable ICU 4.8
  506. */
  507. inline bool operator!=(const MessagePattern &other) const {
  508. return !operator==(other);
  509. }
  510. /**
  511. * @return A hash code for this object.
  512. * @stable ICU 4.8
  513. */
  514. int32_t hashCode() const;
  515. /**
  516. * @return this instance's UMessagePatternApostropheMode.
  517. * @stable ICU 4.8
  518. */
  519. UMessagePatternApostropheMode getApostropheMode() const {
  520. return aposMode;
  521. }
  522. // Java has package-private jdkAposMode() here.
  523. // In C++, this is declared in the MessageImpl class.
  524. /**
  525. * @return the parsed pattern string (null if none was parsed).
  526. * @stable ICU 4.8
  527. */
  528. const UnicodeString &getPatternString() const {
  529. return msg;
  530. }
  531. /**
  532. * Does the parsed pattern have named arguments like {first_name}?
  533. * @return true if the parsed pattern has at least one named argument.
  534. * @stable ICU 4.8
  535. */
  536. UBool hasNamedArguments() const {
  537. return hasArgNames;
  538. }
  539. /**
  540. * Does the parsed pattern have numbered arguments like {2}?
  541. * @return true if the parsed pattern has at least one numbered argument.
  542. * @stable ICU 4.8
  543. */
  544. UBool hasNumberedArguments() const {
  545. return hasArgNumbers;
  546. }
  547. /**
  548. * Validates and parses an argument name or argument number string.
  549. * An argument name must be a "pattern identifier", that is, it must contain
  550. * no Unicode Pattern_Syntax or Pattern_White_Space characters.
  551. * If it only contains ASCII digits, then it must be a small integer with no leading zero.
  552. * @param name Input string.
  553. * @return &gt;=0 if the name is a valid number,
  554. * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
  555. * ARG_NAME_NOT_VALID (-2) if it is neither.
  556. * @stable ICU 4.8
  557. */
  558. static int32_t validateArgumentName(const UnicodeString &name);
  559. /**
  560. * Returns a version of the parsed pattern string where each ASCII apostrophe
  561. * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
  562. * <p>
  563. * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
  564. * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
  565. * @return the deep-auto-quoted version of the parsed pattern string.
  566. * @see MessageFormat.autoQuoteApostrophe()
  567. * @stable ICU 4.8
  568. */
  569. UnicodeString autoQuoteApostropheDeep() const;
  570. class Part;
  571. /**
  572. * Returns the number of "parts" created by parsing the pattern string.
  573. * Returns 0 if no pattern has been parsed or clear() was called.
  574. * @return the number of pattern parts.
  575. * @stable ICU 4.8
  576. */
  577. int32_t countParts() const {
  578. return partsLength;
  579. }
  580. /**
  581. * Gets the i-th pattern "part".
  582. * @param i The index of the Part data. (0..countParts()-1)
  583. * @return the i-th pattern "part".
  584. * @stable ICU 4.8
  585. */
  586. const Part &getPart(int32_t i) const {
  587. return parts[i];
  588. }
  589. /**
  590. * Returns the UMessagePatternPartType of the i-th pattern "part".
  591. * Convenience method for getPart(i).getType().
  592. * @param i The index of the Part data. (0..countParts()-1)
  593. * @return The UMessagePatternPartType of the i-th Part.
  594. * @stable ICU 4.8
  595. */
  596. UMessagePatternPartType getPartType(int32_t i) const {
  597. return getPart(i).type;
  598. }
  599. /**
  600. * Returns the pattern index of the specified pattern "part".
  601. * Convenience method for getPart(partIndex).getIndex().
  602. * @param partIndex The index of the Part data. (0..countParts()-1)
  603. * @return The pattern index of this Part.
  604. * @stable ICU 4.8
  605. */
  606. int32_t getPatternIndex(int32_t partIndex) const {
  607. return getPart(partIndex).index;
  608. }
  609. /**
  610. * Returns the substring of the pattern string indicated by the Part.
  611. * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
  612. * @param part a part of this MessagePattern.
  613. * @return the substring associated with part.
  614. * @stable ICU 4.8
  615. */
  616. UnicodeString getSubstring(const Part &part) const {
  617. return msg.tempSubString(part.index, part.length);
  618. }
  619. /**
  620. * Compares the part's substring with the input string s.
  621. * @param part a part of this MessagePattern.
  622. * @param s a string.
  623. * @return true if getSubstring(part).equals(s).
  624. * @stable ICU 4.8
  625. */
  626. UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
  627. return 0==msg.compare(part.index, part.length, s);
  628. }
  629. /**
  630. * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
  631. * @param part a part of this MessagePattern.
  632. * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
  633. * @stable ICU 4.8
  634. */
  635. double getNumericValue(const Part &part) const;
  636. /**
  637. * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
  638. * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
  639. * @return the "offset:" value.
  640. * @stable ICU 4.8
  641. */
  642. double getPluralOffset(int32_t pluralStart) const;
  643. /**
  644. * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
  645. * @param start The index of some Part data (0..countParts()-1);
  646. * this Part should be of Type ARG_START or MSG_START.
  647. * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
  648. * or start itself if getPartType(msgStart)!=ARG|MSG_START.
  649. * @stable ICU 4.8
  650. */
  651. int32_t getLimitPartIndex(int32_t start) const {
  652. int32_t limit=getPart(start).limitPartIndex;
  653. if(limit<start) {
  654. return start;
  655. }
  656. return limit;
  657. }
  658. /**
  659. * A message pattern "part", representing a pattern parsing event.
  660. * There is a part for the start and end of a message or argument,
  661. * for quoting and escaping of and with ASCII apostrophes,
  662. * and for syntax elements of "complex" arguments.
  663. * @stable ICU 4.8
  664. */
  665. class Part : public UMemory {
  666. public:
  667. /**
  668. * Default constructor, do not use.
  669. * @internal
  670. */
  671. Part() {}
  672. /**
  673. * Returns the type of this part.
  674. * @return the part type.
  675. * @stable ICU 4.8
  676. */
  677. UMessagePatternPartType getType() const {
  678. return type;
  679. }
  680. /**
  681. * Returns the pattern string index associated with this Part.
  682. * @return this part's pattern string index.
  683. * @stable ICU 4.8
  684. */
  685. int32_t getIndex() const {
  686. return index;
  687. }
  688. /**
  689. * Returns the length of the pattern substring associated with this Part.
  690. * This is 0 for some parts.
  691. * @return this part's pattern substring length.
  692. * @stable ICU 4.8
  693. */
  694. int32_t getLength() const {
  695. return length;
  696. }
  697. /**
  698. * Returns the pattern string limit (exclusive-end) index associated with this Part.
  699. * Convenience method for getIndex()+getLength().
  700. * @return this part's pattern string limit index, same as getIndex()+getLength().
  701. * @stable ICU 4.8
  702. */
  703. int32_t getLimit() const {
  704. return index+length;
  705. }
  706. /**
  707. * Returns a value associated with this part.
  708. * See the documentation of each part type for details.
  709. * @return the part value.
  710. * @stable ICU 4.8
  711. */
  712. int32_t getValue() const {
  713. return value;
  714. }
  715. /**
  716. * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
  717. * otherwise UMSGPAT_ARG_TYPE_NONE.
  718. * @return the argument type for this part.
  719. * @stable ICU 4.8
  720. */
  721. UMessagePatternArgType getArgType() const {
  722. UMessagePatternPartType msgType=getType();
  723. if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
  724. return (UMessagePatternArgType)value;
  725. } else {
  726. return UMSGPAT_ARG_TYPE_NONE;
  727. }
  728. }
  729. /**
  730. * Indicates whether the Part type has a numeric value.
  731. * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
  732. * @param type The Part type to be tested.
  733. * @return true if the Part type has a numeric value.
  734. * @stable ICU 4.8
  735. */
  736. static UBool hasNumericValue(UMessagePatternPartType type) {
  737. return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
  738. }
  739. /**
  740. * @param other another object to compare with.
  741. * @return true if this object is equivalent to the other one.
  742. * @stable ICU 4.8
  743. */
  744. bool operator==(const Part &other) const;
  745. /**
  746. * @param other another object to compare with.
  747. * @return false if this object is equivalent to the other one.
  748. * @stable ICU 4.8
  749. */
  750. inline bool operator!=(const Part &other) const {
  751. return !operator==(other);
  752. }
  753. /**
  754. * @return A hash code for this object.
  755. * @stable ICU 4.8
  756. */
  757. int32_t hashCode() const {
  758. return ((type*37+index)*37+length)*37+value;
  759. }
  760. private:
  761. friend class MessagePattern;
  762. static const int32_t MAX_LENGTH=0xffff;
  763. static const int32_t MAX_VALUE=0x7fff;
  764. // Some fields are not final because they are modified during pattern parsing.
  765. // After pattern parsing, the parts are effectively immutable.
  766. UMessagePatternPartType type;
  767. int32_t index;
  768. uint16_t length;
  769. int16_t value;
  770. int32_t limitPartIndex;
  771. };
  772. private:
  773. void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
  774. void postParse();
  775. int32_t parseMessage(int32_t index, int32_t msgStartLength,
  776. int32_t nestingLevel, UMessagePatternArgType parentType,
  777. UParseError *parseError, UErrorCode &errorCode);
  778. int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
  779. UParseError *parseError, UErrorCode &errorCode);
  780. int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
  781. int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
  782. UParseError *parseError, UErrorCode &errorCode);
  783. int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
  784. UParseError *parseError, UErrorCode &errorCode);
  785. /**
  786. * Validates and parses an argument name or argument number string.
  787. * This internal method assumes that the input substring is a "pattern identifier".
  788. * @return &gt;=0 if the name is a valid number,
  789. * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
  790. * ARG_NAME_NOT_VALID (-2) if it is neither.
  791. * @see #validateArgumentName(String)
  792. */
  793. static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
  794. int32_t parseArgNumber(int32_t start, int32_t limit) {
  795. return parseArgNumber(msg, start, limit);
  796. }
  797. /**
  798. * Parses a number from the specified message substring.
  799. * @param start start index into the message string
  800. * @param limit limit index into the message string, must be start<limit
  801. * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
  802. * @param parseError
  803. * @param errorCode
  804. */
  805. void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
  806. UParseError *parseError, UErrorCode &errorCode);
  807. // Java has package-private appendReducedApostrophes() here.
  808. // In C++, this is declared in the MessageImpl class.
  809. int32_t skipWhiteSpace(int32_t index);
  810. int32_t skipIdentifier(int32_t index);
  811. /**
  812. * Skips a sequence of characters that could occur in a double value.
  813. * Does not fully parse or validate the value.
  814. */
  815. int32_t skipDouble(int32_t index);
  816. static UBool isArgTypeChar(UChar32 c);
  817. UBool isChoice(int32_t index);
  818. UBool isPlural(int32_t index);
  819. UBool isSelect(int32_t index);
  820. UBool isOrdinal(int32_t index);
  821. /**
  822. * @return true if we are inside a MessageFormat (sub-)pattern,
  823. * as opposed to inside a top-level choice/plural/select pattern.
  824. */
  825. UBool inMessageFormatPattern(int32_t nestingLevel);
  826. /**
  827. * @return true if we are in a MessageFormat sub-pattern
  828. * of a top-level ChoiceFormat pattern.
  829. */
  830. UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
  831. void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
  832. int32_t value, UErrorCode &errorCode);
  833. void addLimitPart(int32_t start,
  834. UMessagePatternPartType type, int32_t index, int32_t length,
  835. int32_t value, UErrorCode &errorCode);
  836. void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
  837. void setParseError(UParseError *parseError, int32_t index);
  838. UBool init(UErrorCode &errorCode);
  839. UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
  840. UMessagePatternApostropheMode aposMode;
  841. UnicodeString msg;
  842. // ArrayList<Part> parts=new ArrayList<Part>();
  843. MessagePatternPartsList *partsList;
  844. Part *parts;
  845. int32_t partsLength;
  846. // ArrayList<Double> numericValues;
  847. MessagePatternDoubleList *numericValuesList;
  848. double *numericValues;
  849. int32_t numericValuesLength;
  850. UBool hasArgNames;
  851. UBool hasArgNumbers;
  852. UBool needsAutoQuoting;
  853. };
  854. U_NAMESPACE_END
  855. #endif // !UCONFIG_NO_FORMATTING
  856. #endif /* U_SHOW_CPLUSPLUS_API */
  857. #endif // __MESSAGEPATTERN_H__