txExprLexer.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /**
  6. * Lexical analyzer for XPath expressions
  7. */
  8. #include "txExprLexer.h"
  9. #include "nsGkAtoms.h"
  10. #include "nsString.h"
  11. #include "nsError.h"
  12. #include "txXMLUtils.h"
  13. /**
  14. * Creates a new ExprLexer
  15. */
  16. txExprLexer::txExprLexer()
  17. : mCurrentItem(nullptr),
  18. mFirstItem(nullptr),
  19. mLastItem(nullptr),
  20. mTokenCount(0)
  21. {
  22. }
  23. /**
  24. * Destroys this instance of an txExprLexer
  25. */
  26. txExprLexer::~txExprLexer()
  27. {
  28. //-- delete tokens
  29. Token* tok = mFirstItem;
  30. while (tok) {
  31. Token* temp = tok->mNext;
  32. delete tok;
  33. tok = temp;
  34. }
  35. mCurrentItem = nullptr;
  36. }
  37. Token*
  38. txExprLexer::nextToken()
  39. {
  40. if (!mCurrentItem) {
  41. NS_NOTREACHED("nextToken called on uninitialized lexer");
  42. return nullptr;
  43. }
  44. if (mCurrentItem->mType == Token::END) {
  45. // Do not progress beyond the end token
  46. return mCurrentItem;
  47. }
  48. Token* token = mCurrentItem;
  49. mCurrentItem = mCurrentItem->mNext;
  50. return token;
  51. }
  52. void
  53. txExprLexer::addToken(Token* aToken)
  54. {
  55. if (mLastItem) {
  56. mLastItem->mNext = aToken;
  57. }
  58. if (!mFirstItem) {
  59. mFirstItem = aToken;
  60. mCurrentItem = aToken;
  61. }
  62. mLastItem = aToken;
  63. ++mTokenCount;
  64. }
  65. /**
  66. * Returns true if the following Token should be an operator.
  67. * This is a helper for the first bullet of [XPath 3.7]
  68. * Lexical Structure
  69. */
  70. bool
  71. txExprLexer::nextIsOperatorToken(Token* aToken)
  72. {
  73. if (!aToken || aToken->mType == Token::NULL_TOKEN) {
  74. return false;
  75. }
  76. /* This relies on the tokens having the right order in txExprLexer.h */
  77. return aToken->mType < Token::COMMA ||
  78. aToken->mType > Token::UNION_OP;
  79. }
  80. /**
  81. * Parses the given string into a sequence of Tokens
  82. */
  83. nsresult
  84. txExprLexer::parse(const nsASingleFragmentString& aPattern)
  85. {
  86. iterator start, end;
  87. start = aPattern.BeginReading(mPosition);
  88. aPattern.EndReading(end);
  89. //-- initialize previous token, this will automatically get
  90. //-- deleted when it goes out of scope
  91. Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
  92. Token::Type defType;
  93. Token* newToken = nullptr;
  94. Token* prevToken = &nullToken;
  95. bool isToken;
  96. while (mPosition < end) {
  97. defType = Token::CNAME;
  98. isToken = true;
  99. if (*mPosition == DOLLAR_SIGN) {
  100. if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
  101. return NS_ERROR_XPATH_INVALID_VAR_NAME;
  102. }
  103. defType = Token::VAR_REFERENCE;
  104. }
  105. // just reuse the QName parsing, which will use defType
  106. // the token to construct
  107. if (XMLUtils::isLetter(*mPosition)) {
  108. // NCName, can get QName or OperatorName;
  109. // FunctionName, NodeName, and AxisSpecifier may want whitespace,
  110. // and are dealt with below
  111. start = mPosition;
  112. while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
  113. /* just go */
  114. }
  115. if (mPosition < end && *mPosition == COLON) {
  116. // try QName or wildcard, might need to step back for axis
  117. if (++mPosition == end) {
  118. return NS_ERROR_XPATH_UNEXPECTED_END;
  119. }
  120. if (XMLUtils::isLetter(*mPosition)) {
  121. while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
  122. /* just go */
  123. }
  124. }
  125. else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
  126. // eat wildcard for NameTest, bail for var ref at COLON
  127. ++mPosition;
  128. }
  129. else {
  130. --mPosition; // step back
  131. }
  132. }
  133. if (nextIsOperatorToken(prevToken)) {
  134. nsDependentSubstring op(Substring(start, mPosition));
  135. if (nsGkAtoms::_and->Equals(op)) {
  136. defType = Token::AND_OP;
  137. }
  138. else if (nsGkAtoms::_or->Equals(op)) {
  139. defType = Token::OR_OP;
  140. }
  141. else if (nsGkAtoms::mod->Equals(op)) {
  142. defType = Token::MODULUS_OP;
  143. }
  144. else if (nsGkAtoms::div->Equals(op)) {
  145. defType = Token::DIVIDE_OP;
  146. }
  147. else {
  148. // XXX QUESTION: spec is not too precise
  149. // badops is sure an error, but is bad:ops, too? We say yes!
  150. return NS_ERROR_XPATH_OPERATOR_EXPECTED;
  151. }
  152. }
  153. newToken = new Token(start, mPosition, defType);
  154. }
  155. else if (isXPathDigit(*mPosition)) {
  156. start = mPosition;
  157. while (++mPosition < end && isXPathDigit(*mPosition)) {
  158. /* just go */
  159. }
  160. if (mPosition < end && *mPosition == '.') {
  161. while (++mPosition < end && isXPathDigit(*mPosition)) {
  162. /* just go */
  163. }
  164. }
  165. newToken = new Token(start, mPosition, Token::NUMBER);
  166. }
  167. else {
  168. switch (*mPosition) {
  169. //-- ignore whitespace
  170. case SPACE:
  171. case TX_TAB:
  172. case TX_CR:
  173. case TX_LF:
  174. ++mPosition;
  175. isToken = false;
  176. break;
  177. case S_QUOTE :
  178. case D_QUOTE :
  179. start = mPosition;
  180. while (++mPosition < end && *mPosition != *start) {
  181. // eat literal
  182. }
  183. if (mPosition == end) {
  184. mPosition = start;
  185. return NS_ERROR_XPATH_UNCLOSED_LITERAL;
  186. }
  187. newToken = new Token(start + 1, mPosition, Token::LITERAL);
  188. ++mPosition;
  189. break;
  190. case PERIOD:
  191. // period can be .., .(DIGITS)+ or ., check next
  192. if (++mPosition == end) {
  193. newToken = new Token(mPosition - 1, Token::SELF_NODE);
  194. }
  195. else if (isXPathDigit(*mPosition)) {
  196. start = mPosition - 1;
  197. while (++mPosition < end && isXPathDigit(*mPosition)) {
  198. /* just go */
  199. }
  200. newToken = new Token(start, mPosition, Token::NUMBER);
  201. }
  202. else if (*mPosition == PERIOD) {
  203. ++mPosition;
  204. newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
  205. }
  206. else {
  207. newToken = new Token(mPosition - 1, Token::SELF_NODE);
  208. }
  209. break;
  210. case COLON: // QNames are dealt above, must be axis ident
  211. if (++mPosition >= end || *mPosition != COLON ||
  212. prevToken->mType != Token::CNAME) {
  213. return NS_ERROR_XPATH_BAD_COLON;
  214. }
  215. prevToken->mType = Token::AXIS_IDENTIFIER;
  216. ++mPosition;
  217. isToken = false;
  218. break;
  219. case FORWARD_SLASH :
  220. if (++mPosition < end && *mPosition == FORWARD_SLASH) {
  221. ++mPosition;
  222. newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
  223. }
  224. else {
  225. newToken = new Token(mPosition - 1, Token::PARENT_OP);
  226. }
  227. break;
  228. case BANG : // can only be !=
  229. if (++mPosition < end && *mPosition == EQUAL) {
  230. ++mPosition;
  231. newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
  232. break;
  233. }
  234. // Error ! is not not()
  235. return NS_ERROR_XPATH_BAD_BANG;
  236. case EQUAL:
  237. newToken = new Token(mPosition, Token::EQUAL_OP);
  238. ++mPosition;
  239. break;
  240. case L_ANGLE:
  241. if (++mPosition == end) {
  242. return NS_ERROR_XPATH_UNEXPECTED_END;
  243. }
  244. if (*mPosition == EQUAL) {
  245. ++mPosition;
  246. newToken = new Token(mPosition - 2, mPosition,
  247. Token::LESS_OR_EQUAL_OP);
  248. }
  249. else {
  250. newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
  251. }
  252. break;
  253. case R_ANGLE:
  254. if (++mPosition == end) {
  255. return NS_ERROR_XPATH_UNEXPECTED_END;
  256. }
  257. if (*mPosition == EQUAL) {
  258. ++mPosition;
  259. newToken = new Token(mPosition - 2, mPosition,
  260. Token::GREATER_OR_EQUAL_OP);
  261. }
  262. else {
  263. newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
  264. }
  265. break;
  266. case HYPHEN :
  267. newToken = new Token(mPosition, Token::SUBTRACTION_OP);
  268. ++mPosition;
  269. break;
  270. case ASTERISK:
  271. if (nextIsOperatorToken(prevToken)) {
  272. newToken = new Token(mPosition, Token::MULTIPLY_OP);
  273. }
  274. else {
  275. newToken = new Token(mPosition, Token::CNAME);
  276. }
  277. ++mPosition;
  278. break;
  279. case L_PAREN:
  280. if (prevToken->mType == Token::CNAME) {
  281. const nsDependentSubstring& val = prevToken->Value();
  282. if (val.EqualsLiteral("comment")) {
  283. prevToken->mType = Token::COMMENT_AND_PAREN;
  284. }
  285. else if (val.EqualsLiteral("node")) {
  286. prevToken->mType = Token::NODE_AND_PAREN;
  287. }
  288. else if (val.EqualsLiteral("processing-instruction")) {
  289. prevToken->mType = Token::PROC_INST_AND_PAREN;
  290. }
  291. else if (val.EqualsLiteral("text")) {
  292. prevToken->mType = Token::TEXT_AND_PAREN;
  293. }
  294. else {
  295. prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
  296. }
  297. isToken = false;
  298. }
  299. else {
  300. newToken = new Token(mPosition, Token::L_PAREN);
  301. }
  302. ++mPosition;
  303. break;
  304. case R_PAREN:
  305. newToken = new Token(mPosition, Token::R_PAREN);
  306. ++mPosition;
  307. break;
  308. case L_BRACKET:
  309. newToken = new Token(mPosition, Token::L_BRACKET);
  310. ++mPosition;
  311. break;
  312. case R_BRACKET:
  313. newToken = new Token(mPosition, Token::R_BRACKET);
  314. ++mPosition;
  315. break;
  316. case COMMA:
  317. newToken = new Token(mPosition, Token::COMMA);
  318. ++mPosition;
  319. break;
  320. case AT_SIGN :
  321. newToken = new Token(mPosition, Token::AT_SIGN);
  322. ++mPosition;
  323. break;
  324. case PLUS:
  325. newToken = new Token(mPosition, Token::ADDITION_OP);
  326. ++mPosition;
  327. break;
  328. case VERT_BAR:
  329. newToken = new Token(mPosition, Token::UNION_OP);
  330. ++mPosition;
  331. break;
  332. default:
  333. // Error, don't grok character :-(
  334. return NS_ERROR_XPATH_ILLEGAL_CHAR;
  335. }
  336. }
  337. if (isToken) {
  338. NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
  339. NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
  340. prevToken = newToken;
  341. addToken(newToken);
  342. }
  343. }
  344. // add a endToken to the list
  345. newToken = new Token(end, end, Token::END);
  346. addToken(newToken);
  347. return NS_OK;
  348. }