123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- /**
- * Lexical analyzer for XPath expressions
- */
- #include "txExprLexer.h"
- #include "nsGkAtoms.h"
- #include "nsString.h"
- #include "nsError.h"
- #include "txXMLUtils.h"
- /**
- * Creates a new ExprLexer
- */
- txExprLexer::txExprLexer()
- : mCurrentItem(nullptr),
- mFirstItem(nullptr),
- mLastItem(nullptr),
- mTokenCount(0)
- {
- }
- /**
- * Destroys this instance of an txExprLexer
- */
- txExprLexer::~txExprLexer()
- {
- //-- delete tokens
- Token* tok = mFirstItem;
- while (tok) {
- Token* temp = tok->mNext;
- delete tok;
- tok = temp;
- }
- mCurrentItem = nullptr;
- }
- Token*
- txExprLexer::nextToken()
- {
- if (!mCurrentItem) {
- NS_NOTREACHED("nextToken called on uninitialized lexer");
- return nullptr;
- }
- if (mCurrentItem->mType == Token::END) {
- // Do not progress beyond the end token
- return mCurrentItem;
- }
- Token* token = mCurrentItem;
- mCurrentItem = mCurrentItem->mNext;
- return token;
- }
- void
- txExprLexer::addToken(Token* aToken)
- {
- if (mLastItem) {
- mLastItem->mNext = aToken;
- }
- if (!mFirstItem) {
- mFirstItem = aToken;
- mCurrentItem = aToken;
- }
- mLastItem = aToken;
- ++mTokenCount;
- }
- /**
- * Returns true if the following Token should be an operator.
- * This is a helper for the first bullet of [XPath 3.7]
- * Lexical Structure
- */
- bool
- txExprLexer::nextIsOperatorToken(Token* aToken)
- {
- if (!aToken || aToken->mType == Token::NULL_TOKEN) {
- return false;
- }
- /* This relies on the tokens having the right order in txExprLexer.h */
- return aToken->mType < Token::COMMA ||
- aToken->mType > Token::UNION_OP;
- }
- /**
- * Parses the given string into a sequence of Tokens
- */
- nsresult
- txExprLexer::parse(const nsASingleFragmentString& aPattern)
- {
- iterator start, end;
- start = aPattern.BeginReading(mPosition);
- aPattern.EndReading(end);
- //-- initialize previous token, this will automatically get
- //-- deleted when it goes out of scope
- Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
- Token::Type defType;
- Token* newToken = nullptr;
- Token* prevToken = &nullToken;
- bool isToken;
- while (mPosition < end) {
- defType = Token::CNAME;
- isToken = true;
- if (*mPosition == DOLLAR_SIGN) {
- if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
- return NS_ERROR_XPATH_INVALID_VAR_NAME;
- }
- defType = Token::VAR_REFERENCE;
- }
- // just reuse the QName parsing, which will use defType
- // the token to construct
- if (XMLUtils::isLetter(*mPosition)) {
- // NCName, can get QName or OperatorName;
- // FunctionName, NodeName, and AxisSpecifier may want whitespace,
- // and are dealt with below
- start = mPosition;
- while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
- /* just go */
- }
- if (mPosition < end && *mPosition == COLON) {
- // try QName or wildcard, might need to step back for axis
- if (++mPosition == end) {
- return NS_ERROR_XPATH_UNEXPECTED_END;
- }
- if (XMLUtils::isLetter(*mPosition)) {
- while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
- /* just go */
- }
- }
- else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
- // eat wildcard for NameTest, bail for var ref at COLON
- ++mPosition;
- }
- else {
- --mPosition; // step back
- }
- }
- if (nextIsOperatorToken(prevToken)) {
- nsDependentSubstring op(Substring(start, mPosition));
- if (nsGkAtoms::_and->Equals(op)) {
- defType = Token::AND_OP;
- }
- else if (nsGkAtoms::_or->Equals(op)) {
- defType = Token::OR_OP;
- }
- else if (nsGkAtoms::mod->Equals(op)) {
- defType = Token::MODULUS_OP;
- }
- else if (nsGkAtoms::div->Equals(op)) {
- defType = Token::DIVIDE_OP;
- }
- else {
- // XXX QUESTION: spec is not too precise
- // badops is sure an error, but is bad:ops, too? We say yes!
- return NS_ERROR_XPATH_OPERATOR_EXPECTED;
- }
- }
- newToken = new Token(start, mPosition, defType);
- }
- else if (isXPathDigit(*mPosition)) {
- start = mPosition;
- while (++mPosition < end && isXPathDigit(*mPosition)) {
- /* just go */
- }
- if (mPosition < end && *mPosition == '.') {
- while (++mPosition < end && isXPathDigit(*mPosition)) {
- /* just go */
- }
- }
- newToken = new Token(start, mPosition, Token::NUMBER);
- }
- else {
- switch (*mPosition) {
- //-- ignore whitespace
- case SPACE:
- case TX_TAB:
- case TX_CR:
- case TX_LF:
- ++mPosition;
- isToken = false;
- break;
- case S_QUOTE :
- case D_QUOTE :
- start = mPosition;
- while (++mPosition < end && *mPosition != *start) {
- // eat literal
- }
- if (mPosition == end) {
- mPosition = start;
- return NS_ERROR_XPATH_UNCLOSED_LITERAL;
- }
- newToken = new Token(start + 1, mPosition, Token::LITERAL);
- ++mPosition;
- break;
- case PERIOD:
- // period can be .., .(DIGITS)+ or ., check next
- if (++mPosition == end) {
- newToken = new Token(mPosition - 1, Token::SELF_NODE);
- }
- else if (isXPathDigit(*mPosition)) {
- start = mPosition - 1;
- while (++mPosition < end && isXPathDigit(*mPosition)) {
- /* just go */
- }
- newToken = new Token(start, mPosition, Token::NUMBER);
- }
- else if (*mPosition == PERIOD) {
- ++mPosition;
- newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
- }
- else {
- newToken = new Token(mPosition - 1, Token::SELF_NODE);
- }
- break;
- case COLON: // QNames are dealt above, must be axis ident
- if (++mPosition >= end || *mPosition != COLON ||
- prevToken->mType != Token::CNAME) {
- return NS_ERROR_XPATH_BAD_COLON;
- }
- prevToken->mType = Token::AXIS_IDENTIFIER;
- ++mPosition;
- isToken = false;
- break;
- case FORWARD_SLASH :
- if (++mPosition < end && *mPosition == FORWARD_SLASH) {
- ++mPosition;
- newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
- }
- else {
- newToken = new Token(mPosition - 1, Token::PARENT_OP);
- }
- break;
- case BANG : // can only be !=
- if (++mPosition < end && *mPosition == EQUAL) {
- ++mPosition;
- newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
- break;
- }
- // Error ! is not not()
- return NS_ERROR_XPATH_BAD_BANG;
- case EQUAL:
- newToken = new Token(mPosition, Token::EQUAL_OP);
- ++mPosition;
- break;
- case L_ANGLE:
- if (++mPosition == end) {
- return NS_ERROR_XPATH_UNEXPECTED_END;
- }
- if (*mPosition == EQUAL) {
- ++mPosition;
- newToken = new Token(mPosition - 2, mPosition,
- Token::LESS_OR_EQUAL_OP);
- }
- else {
- newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
- }
- break;
- case R_ANGLE:
- if (++mPosition == end) {
- return NS_ERROR_XPATH_UNEXPECTED_END;
- }
- if (*mPosition == EQUAL) {
- ++mPosition;
- newToken = new Token(mPosition - 2, mPosition,
- Token::GREATER_OR_EQUAL_OP);
- }
- else {
- newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
- }
- break;
- case HYPHEN :
- newToken = new Token(mPosition, Token::SUBTRACTION_OP);
- ++mPosition;
- break;
- case ASTERISK:
- if (nextIsOperatorToken(prevToken)) {
- newToken = new Token(mPosition, Token::MULTIPLY_OP);
- }
- else {
- newToken = new Token(mPosition, Token::CNAME);
- }
- ++mPosition;
- break;
- case L_PAREN:
- if (prevToken->mType == Token::CNAME) {
- const nsDependentSubstring& val = prevToken->Value();
- if (val.EqualsLiteral("comment")) {
- prevToken->mType = Token::COMMENT_AND_PAREN;
- }
- else if (val.EqualsLiteral("node")) {
- prevToken->mType = Token::NODE_AND_PAREN;
- }
- else if (val.EqualsLiteral("processing-instruction")) {
- prevToken->mType = Token::PROC_INST_AND_PAREN;
- }
- else if (val.EqualsLiteral("text")) {
- prevToken->mType = Token::TEXT_AND_PAREN;
- }
- else {
- prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
- }
- isToken = false;
- }
- else {
- newToken = new Token(mPosition, Token::L_PAREN);
- }
- ++mPosition;
- break;
- case R_PAREN:
- newToken = new Token(mPosition, Token::R_PAREN);
- ++mPosition;
- break;
- case L_BRACKET:
- newToken = new Token(mPosition, Token::L_BRACKET);
- ++mPosition;
- break;
- case R_BRACKET:
- newToken = new Token(mPosition, Token::R_BRACKET);
- ++mPosition;
- break;
- case COMMA:
- newToken = new Token(mPosition, Token::COMMA);
- ++mPosition;
- break;
- case AT_SIGN :
- newToken = new Token(mPosition, Token::AT_SIGN);
- ++mPosition;
- break;
- case PLUS:
- newToken = new Token(mPosition, Token::ADDITION_OP);
- ++mPosition;
- break;
- case VERT_BAR:
- newToken = new Token(mPosition, Token::UNION_OP);
- ++mPosition;
- break;
- default:
- // Error, don't grok character :-(
- return NS_ERROR_XPATH_ILLEGAL_CHAR;
- }
- }
- if (isToken) {
- NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
- NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
- prevToken = newToken;
- addToken(newToken);
- }
- }
- // add a endToken to the list
- newToken = new Token(end, end, Token::END);
- addToken(newToken);
- return NS_OK;
- }
|