txExprLexer.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #ifndef MITREXSL_EXPRLEXER_H
  6. #define MITREXSL_EXPRLEXER_H
  7. #include "txCore.h"
  8. #include "nsString.h"
  9. /**
  10. * A Token class for the ExprLexer.
  11. *
  12. * This class was ported from XSL:P, an open source Java based
  13. * XSLT processor, written by yours truly.
  14. */
  15. class Token
  16. {
  17. public:
  18. /**
  19. * Token types
  20. */
  21. enum Type {
  22. //-- Trivial Tokens
  23. NULL_TOKEN = 1,
  24. LITERAL,
  25. NUMBER,
  26. CNAME,
  27. VAR_REFERENCE,
  28. PARENT_NODE,
  29. SELF_NODE,
  30. R_PAREN,
  31. R_BRACKET, // 9
  32. /**
  33. * start of tokens for 3.7, bullet 1
  34. * ExprLexer::nextIsOperatorToken bails if the tokens aren't
  35. * consecutive.
  36. */
  37. COMMA,
  38. AT_SIGN,
  39. L_PAREN,
  40. L_BRACKET,
  41. AXIS_IDENTIFIER,
  42. // These tokens include their following left parenthesis
  43. FUNCTION_NAME_AND_PAREN, // 15
  44. COMMENT_AND_PAREN,
  45. NODE_AND_PAREN,
  46. PROC_INST_AND_PAREN,
  47. TEXT_AND_PAREN,
  48. /**
  49. * operators
  50. */
  51. //-- boolean ops
  52. AND_OP, // 20
  53. OR_OP,
  54. //-- relational
  55. EQUAL_OP, // 22
  56. NOT_EQUAL_OP,
  57. LESS_THAN_OP,
  58. GREATER_THAN_OP,
  59. LESS_OR_EQUAL_OP,
  60. GREATER_OR_EQUAL_OP,
  61. //-- additive operators
  62. ADDITION_OP, // 28
  63. SUBTRACTION_OP,
  64. //-- multiplicative
  65. DIVIDE_OP, // 30
  66. MULTIPLY_OP,
  67. MODULUS_OP,
  68. //-- path operators
  69. PARENT_OP, // 33
  70. ANCESTOR_OP,
  71. UNION_OP,
  72. /**
  73. * end of tokens for 3.7, bullet 1 -/
  74. */
  75. //-- Special endtoken
  76. END // 36
  77. };
  78. /**
  79. * Constructors
  80. */
  81. typedef nsASingleFragmentString::const_char_iterator iterator;
  82. Token(iterator aStart, iterator aEnd, Type aType)
  83. : mStart(aStart),
  84. mEnd(aEnd),
  85. mType(aType),
  86. mNext(nullptr)
  87. {
  88. }
  89. Token(iterator aChar, Type aType)
  90. : mStart(aChar),
  91. mEnd(aChar + 1),
  92. mType(aType),
  93. mNext(nullptr)
  94. {
  95. }
  96. const nsDependentSubstring Value()
  97. {
  98. return Substring(mStart, mEnd);
  99. }
  100. iterator mStart, mEnd;
  101. Type mType;
  102. Token* mNext;
  103. };
  104. /**
  105. * A class for splitting an "Expr" String into tokens and
  106. * performing basic Lexical Analysis.
  107. *
  108. * This class was ported from XSL:P, an open source Java based XSL processor
  109. */
  110. class txExprLexer
  111. {
  112. public:
  113. txExprLexer();
  114. ~txExprLexer();
  115. /**
  116. * Parse the given string.
  117. * returns an error result if lexing failed.
  118. * The given string must outlive the use of the lexer, as the
  119. * generated Tokens point to Substrings of it.
  120. * mPosition points to the offending location in case of an error.
  121. */
  122. nsresult parse(const nsASingleFragmentString& aPattern);
  123. typedef nsASingleFragmentString::const_char_iterator iterator;
  124. iterator mPosition;
  125. /**
  126. * Functions for iterating over the TokenList
  127. */
  128. Token* nextToken();
  129. Token* peek()
  130. {
  131. NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer");
  132. return mCurrentItem;
  133. }
  134. Token* peekAhead()
  135. {
  136. NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer");
  137. // Don't peek past the end node
  138. return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem;
  139. }
  140. bool hasMoreTokens()
  141. {
  142. NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer");
  143. return (mCurrentItem && mCurrentItem->mType != Token::END);
  144. }
  145. /**
  146. * Trivial Tokens
  147. */
  148. //-- LF, changed to enum
  149. enum _TrivialTokens {
  150. D_QUOTE = '\"',
  151. S_QUOTE = '\'',
  152. L_PAREN = '(',
  153. R_PAREN = ')',
  154. L_BRACKET = '[',
  155. R_BRACKET = ']',
  156. L_ANGLE = '<',
  157. R_ANGLE = '>',
  158. COMMA = ',',
  159. PERIOD = '.',
  160. ASTERISK = '*',
  161. FORWARD_SLASH = '/',
  162. EQUAL = '=',
  163. BANG = '!',
  164. VERT_BAR = '|',
  165. AT_SIGN = '@',
  166. DOLLAR_SIGN = '$',
  167. PLUS = '+',
  168. HYPHEN = '-',
  169. COLON = ':',
  170. //-- whitespace tokens
  171. SPACE = ' ',
  172. TX_TAB = '\t',
  173. TX_CR = '\n',
  174. TX_LF = '\r'
  175. };
  176. private:
  177. Token* mCurrentItem;
  178. Token* mFirstItem;
  179. Token* mLastItem;
  180. int mTokenCount;
  181. void addToken(Token* aToken);
  182. /**
  183. * Returns true if the following Token should be an operator.
  184. * This is a helper for the first bullet of [XPath 3.7]
  185. * Lexical Structure
  186. */
  187. bool nextIsOperatorToken(Token* aToken);
  188. /**
  189. * Returns true if the given character represents a numeric letter (digit)
  190. * Implemented in ExprLexerChars.cpp
  191. */
  192. static bool isXPathDigit(char16_t ch)
  193. {
  194. return (ch >= '0' && ch <= '9');
  195. }
  196. };
  197. #endif