XPathParser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /*
  2. * Copyright 2005 Maksim Orlovich <maksim@kde.org>
  3. * Copyright (C) 2006 Apple Computer, Inc.
  4. * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "config.h"
  28. #include "XPathParser.h"
  29. #include "ExceptionCode.h"
  30. #include "XPathEvaluator.h"
  31. #include "XPathException.h"
  32. #include "XPathNSResolver.h"
  33. #include "XPathPath.h"
  34. #include "XPathStep.h"
  35. #include <wtf/StdLibExtras.h>
  36. #include <wtf/text/StringHash.h>
  37. using namespace WebCore;
  38. using namespace WTF;
  39. using namespace Unicode;
  40. using namespace XPath;
  41. extern int xpathyyparse(WebCore::XPath::Parser*);
  42. #include "XPathGrammar.h"
  43. Parser* Parser::currentParser = 0;
  44. enum XMLCat { NameStart, NameCont, NotPartOfName };
  45. typedef HashMap<String, Step::Axis> AxisNamesMap;
  46. static XMLCat charCat(UChar aChar)
  47. {
  48. //### might need to add some special cases from the XML spec.
  49. if (aChar == '_')
  50. return NameStart;
  51. if (aChar == '.' || aChar == '-')
  52. return NameCont;
  53. CharCategory category = Unicode::category(aChar);
  54. if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
  55. return NameStart;
  56. if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
  57. return NameCont;
  58. return NotPartOfName;
  59. }
  60. static void setUpAxisNamesMap(AxisNamesMap& axisNames)
  61. {
  62. struct AxisName {
  63. const char* name;
  64. Step::Axis axis;
  65. };
  66. const AxisName axisNameList[] = {
  67. { "ancestor", Step::AncestorAxis },
  68. { "ancestor-or-self", Step::AncestorOrSelfAxis },
  69. { "attribute", Step::AttributeAxis },
  70. { "child", Step::ChildAxis },
  71. { "descendant", Step::DescendantAxis },
  72. { "descendant-or-self", Step::DescendantOrSelfAxis },
  73. { "following", Step::FollowingAxis },
  74. { "following-sibling", Step::FollowingSiblingAxis },
  75. { "namespace", Step::NamespaceAxis },
  76. { "parent", Step::ParentAxis },
  77. { "preceding", Step::PrecedingAxis },
  78. { "preceding-sibling", Step::PrecedingSiblingAxis },
  79. { "self", Step::SelfAxis }
  80. };
  81. for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
  82. axisNames.set(axisNameList[i].name, axisNameList[i].axis);
  83. }
  84. static bool isAxisName(const String& name, Step::Axis& type)
  85. {
  86. DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
  87. if (axisNames.isEmpty())
  88. setUpAxisNamesMap(axisNames);
  89. AxisNamesMap::iterator it = axisNames.find(name);
  90. if (it == axisNames.end())
  91. return false;
  92. type = it->value;
  93. return true;
  94. }
  95. static bool isNodeTypeName(const String& name)
  96. {
  97. DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
  98. if (nodeTypeNames.isEmpty()) {
  99. nodeTypeNames.add("comment");
  100. nodeTypeNames.add("text");
  101. nodeTypeNames.add("processing-instruction");
  102. nodeTypeNames.add("node");
  103. }
  104. return nodeTypeNames.contains(name);
  105. }
  106. // Returns whether the current token can possibly be a binary operator, given
  107. // the previous token. Necessary to disambiguate some of the operators
  108. // (* (multiply), div, and, or, mod) in the [32] Operator rule
  109. // (check http://www.w3.org/TR/xpath#exprlex).
  110. bool Parser::isBinaryOperatorContext() const
  111. {
  112. switch (m_lastTokenType) {
  113. case 0:
  114. case '@': case AXISNAME: case '(': case '[': case ',':
  115. case AND: case OR: case MULOP:
  116. case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
  117. case EQOP: case RELOP:
  118. return false;
  119. default:
  120. return true;
  121. }
  122. }
  123. void Parser::skipWS()
  124. {
  125. while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
  126. ++m_nextPos;
  127. }
  128. Token Parser::makeTokenAndAdvance(int code, int advance)
  129. {
  130. m_nextPos += advance;
  131. return Token(code);
  132. }
  133. Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
  134. {
  135. m_nextPos += advance;
  136. return Token(code, val);
  137. }
  138. Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
  139. {
  140. m_nextPos += advance;
  141. return Token(code, val);
  142. }
  143. // Returns next char if it's there and interesting, 0 otherwise
  144. char Parser::peekAheadHelper()
  145. {
  146. if (m_nextPos + 1 >= m_data.length())
  147. return 0;
  148. UChar next = m_data[m_nextPos + 1];
  149. if (next >= 0xff)
  150. return 0;
  151. return next;
  152. }
  153. char Parser::peekCurHelper()
  154. {
  155. if (m_nextPos >= m_data.length())
  156. return 0;
  157. UChar next = m_data[m_nextPos];
  158. if (next >= 0xff)
  159. return 0;
  160. return next;
  161. }
  162. Token Parser::lexString()
  163. {
  164. UChar delimiter = m_data[m_nextPos];
  165. int startPos = m_nextPos + 1;
  166. for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
  167. if (m_data[m_nextPos] == delimiter) {
  168. String value = m_data.substring(startPos, m_nextPos - startPos);
  169. if (value.isNull())
  170. value = "";
  171. ++m_nextPos; // Consume the char.
  172. return Token(LITERAL, value);
  173. }
  174. }
  175. // Ouch, went off the end -- report error.
  176. return Token(XPATH_ERROR);
  177. }
  178. Token Parser::lexNumber()
  179. {
  180. int startPos = m_nextPos;
  181. bool seenDot = false;
  182. // Go until end or a non-digits character.
  183. for (; m_nextPos < m_data.length(); ++m_nextPos) {
  184. UChar aChar = m_data[m_nextPos];
  185. if (aChar >= 0xff) break;
  186. if (aChar < '0' || aChar > '9') {
  187. if (aChar == '.' && !seenDot)
  188. seenDot = true;
  189. else
  190. break;
  191. }
  192. }
  193. return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
  194. }
  195. bool Parser::lexNCName(String& name)
  196. {
  197. int startPos = m_nextPos;
  198. if (m_nextPos >= m_data.length())
  199. return false;
  200. if (charCat(m_data[m_nextPos]) != NameStart)
  201. return false;
  202. // Keep going until we get a character that's not good for names.
  203. for (; m_nextPos < m_data.length(); ++m_nextPos)
  204. if (charCat(m_data[m_nextPos]) == NotPartOfName)
  205. break;
  206. name = m_data.substring(startPos, m_nextPos - startPos);
  207. return true;
  208. }
  209. bool Parser::lexQName(String& name)
  210. {
  211. String n1;
  212. if (!lexNCName(n1))
  213. return false;
  214. skipWS();
  215. // If the next character is :, what we just got it the prefix, if not,
  216. // it's the whole thing.
  217. if (peekAheadHelper() != ':') {
  218. name = n1;
  219. return true;
  220. }
  221. String n2;
  222. if (!lexNCName(n2))
  223. return false;
  224. name = n1 + ":" + n2;
  225. return true;
  226. }
  227. Token Parser::nextTokenInternal()
  228. {
  229. skipWS();
  230. if (m_nextPos >= m_data.length())
  231. return Token(0);
  232. char code = peekCurHelper();
  233. switch (code) {
  234. case '(': case ')': case '[': case ']':
  235. case '@': case ',': case '|':
  236. return makeTokenAndAdvance(code);
  237. case '\'':
  238. case '\"':
  239. return lexString();
  240. case '0': case '1': case '2': case '3': case '4':
  241. case '5': case '6': case '7': case '8': case '9':
  242. return lexNumber();
  243. case '.': {
  244. char next = peekAheadHelper();
  245. if (next == '.')
  246. return makeTokenAndAdvance(DOTDOT, 2);
  247. if (next >= '0' && next <= '9')
  248. return lexNumber();
  249. return makeTokenAndAdvance('.');
  250. }
  251. case '/':
  252. if (peekAheadHelper() == '/')
  253. return makeTokenAndAdvance(SLASHSLASH, 2);
  254. return makeTokenAndAdvance('/');
  255. case '+':
  256. return makeTokenAndAdvance(PLUS);
  257. case '-':
  258. return makeTokenAndAdvance(MINUS);
  259. case '=':
  260. return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
  261. case '!':
  262. if (peekAheadHelper() == '=')
  263. return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
  264. return Token(XPATH_ERROR);
  265. case '<':
  266. if (peekAheadHelper() == '=')
  267. return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
  268. return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
  269. case '>':
  270. if (peekAheadHelper() == '=')
  271. return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
  272. return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
  273. case '*':
  274. if (isBinaryOperatorContext())
  275. return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
  276. ++m_nextPos;
  277. return Token(NAMETEST, "*");
  278. case '$': { // $ QName
  279. m_nextPos++;
  280. String name;
  281. if (!lexQName(name))
  282. return Token(XPATH_ERROR);
  283. return Token(VARIABLEREFERENCE, name);
  284. }
  285. }
  286. String name;
  287. if (!lexNCName(name))
  288. return Token(XPATH_ERROR);
  289. skipWS();
  290. // If we're in an operator context, check for any operator names
  291. if (isBinaryOperatorContext()) {
  292. if (name == "and") //### hash?
  293. return Token(AND);
  294. if (name == "or")
  295. return Token(OR);
  296. if (name == "mod")
  297. return Token(MULOP, NumericOp::OP_Mod);
  298. if (name == "div")
  299. return Token(MULOP, NumericOp::OP_Div);
  300. }
  301. // See whether we are at a :
  302. if (peekCurHelper() == ':') {
  303. m_nextPos++;
  304. // Any chance it's an axis name?
  305. if (peekCurHelper() == ':') {
  306. m_nextPos++;
  307. //It might be an axis name.
  308. Step::Axis axis;
  309. if (isAxisName(name, axis))
  310. return Token(AXISNAME, axis);
  311. // Ugh, :: is only valid in axis names -> error
  312. return Token(XPATH_ERROR);
  313. }
  314. // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
  315. skipWS();
  316. if (peekCurHelper() == '*') {
  317. m_nextPos++;
  318. return Token(NAMETEST, name + ":*");
  319. }
  320. // Make a full qname.
  321. String n2;
  322. if (!lexNCName(n2))
  323. return Token(XPATH_ERROR);
  324. name = name + ":" + n2;
  325. }
  326. skipWS();
  327. if (peekCurHelper() == '(') {
  328. //note: we don't swallow the (here!
  329. //either node type of function name
  330. if (isNodeTypeName(name)) {
  331. if (name == "processing-instruction")
  332. return Token(PI, name);
  333. return Token(NODETYPE, name);
  334. }
  335. //must be a function name.
  336. return Token(FUNCTIONNAME, name);
  337. }
  338. // At this point, it must be NAMETEST.
  339. return Token(NAMETEST, name);
  340. }
  341. Token Parser::nextToken()
  342. {
  343. Token toRet = nextTokenInternal();
  344. m_lastTokenType = toRet.type;
  345. return toRet;
  346. }
  347. Parser::Parser()
  348. {
  349. reset(String());
  350. }
  351. Parser::~Parser()
  352. {
  353. }
  354. void Parser::reset(const String& data)
  355. {
  356. m_nextPos = 0;
  357. m_data = data;
  358. m_lastTokenType = 0;
  359. m_topExpr = 0;
  360. m_gotNamespaceError = false;
  361. }
  362. int Parser::lex(void* data)
  363. {
  364. YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
  365. Token tok = nextToken();
  366. switch (tok.type) {
  367. case AXISNAME:
  368. yylval->axis = tok.axis;
  369. break;
  370. case MULOP:
  371. yylval->numop = tok.numop;
  372. break;
  373. case RELOP:
  374. case EQOP:
  375. yylval->eqop = tok.eqop;
  376. break;
  377. case NODETYPE:
  378. case PI:
  379. case FUNCTIONNAME:
  380. case LITERAL:
  381. case VARIABLEREFERENCE:
  382. case NUMBER:
  383. case NAMETEST:
  384. yylval->str = new String(tok.str);
  385. registerString(yylval->str);
  386. break;
  387. }
  388. return tok.type;
  389. }
  390. bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
  391. {
  392. size_t colon = qName.find(':');
  393. if (colon != notFound) {
  394. if (!m_resolver)
  395. return false;
  396. namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
  397. if (namespaceURI.isNull())
  398. return false;
  399. localName = qName.substring(colon + 1);
  400. } else
  401. localName = qName;
  402. return true;
  403. }
  404. Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
  405. {
  406. reset(statement);
  407. m_resolver = resolver;
  408. Parser* oldParser = currentParser;
  409. currentParser = this;
  410. int parseError = xpathyyparse(this);
  411. currentParser = oldParser;
  412. if (parseError) {
  413. deleteAllValues(m_parseNodes);
  414. m_parseNodes.clear();
  415. HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
  416. for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
  417. deleteAllValues(**it);
  418. delete *it;
  419. }
  420. m_predicateVectors.clear();
  421. HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
  422. for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
  423. deleteAllValues(**it);
  424. delete *it;
  425. }
  426. m_expressionVectors.clear();
  427. deleteAllValues(m_strings);
  428. m_strings.clear();
  429. deleteAllValues(m_nodeTests);
  430. m_nodeTests.clear();
  431. m_topExpr = 0;
  432. if (m_gotNamespaceError)
  433. ec = NAMESPACE_ERR;
  434. else
  435. ec = XPathException::INVALID_EXPRESSION_ERR;
  436. return 0;
  437. }
  438. ASSERT(m_parseNodes.size() == 1);
  439. ASSERT(*m_parseNodes.begin() == m_topExpr);
  440. ASSERT(m_expressionVectors.size() == 0);
  441. ASSERT(m_predicateVectors.size() == 0);
  442. ASSERT(m_strings.size() == 0);
  443. ASSERT(m_nodeTests.size() == 0);
  444. m_parseNodes.clear();
  445. Expression* result = m_topExpr;
  446. m_topExpr = 0;
  447. return result;
  448. }
  449. void Parser::registerParseNode(ParseNode* node)
  450. {
  451. if (node == 0)
  452. return;
  453. ASSERT(!m_parseNodes.contains(node));
  454. m_parseNodes.add(node);
  455. }
  456. void Parser::unregisterParseNode(ParseNode* node)
  457. {
  458. if (node == 0)
  459. return;
  460. ASSERT(m_parseNodes.contains(node));
  461. m_parseNodes.remove(node);
  462. }
  463. void Parser::registerPredicateVector(Vector<Predicate*>* vector)
  464. {
  465. if (vector == 0)
  466. return;
  467. ASSERT(!m_predicateVectors.contains(vector));
  468. m_predicateVectors.add(vector);
  469. }
  470. void Parser::deletePredicateVector(Vector<Predicate*>* vector)
  471. {
  472. if (vector == 0)
  473. return;
  474. ASSERT(m_predicateVectors.contains(vector));
  475. m_predicateVectors.remove(vector);
  476. delete vector;
  477. }
  478. void Parser::registerExpressionVector(Vector<Expression*>* vector)
  479. {
  480. if (vector == 0)
  481. return;
  482. ASSERT(!m_expressionVectors.contains(vector));
  483. m_expressionVectors.add(vector);
  484. }
  485. void Parser::deleteExpressionVector(Vector<Expression*>* vector)
  486. {
  487. if (vector == 0)
  488. return;
  489. ASSERT(m_expressionVectors.contains(vector));
  490. m_expressionVectors.remove(vector);
  491. delete vector;
  492. }
  493. void Parser::registerString(String* s)
  494. {
  495. if (s == 0)
  496. return;
  497. ASSERT(!m_strings.contains(s));
  498. m_strings.add(s);
  499. }
  500. void Parser::deleteString(String* s)
  501. {
  502. if (s == 0)
  503. return;
  504. ASSERT(m_strings.contains(s));
  505. m_strings.remove(s);
  506. delete s;
  507. }
  508. void Parser::registerNodeTest(Step::NodeTest* t)
  509. {
  510. if (t == 0)
  511. return;
  512. ASSERT(!m_nodeTests.contains(t));
  513. m_nodeTests.add(t);
  514. }
  515. void Parser::deleteNodeTest(Step::NodeTest* t)
  516. {
  517. if (t == 0)
  518. return;
  519. ASSERT(m_nodeTests.contains(t));
  520. m_nodeTests.remove(t);
  521. delete t;
  522. }