123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- /* tokenization of CSS style sheets */
- #ifndef nsCSSScanner_h___
- #define nsCSSScanner_h___
- #include "nsString.h"
- namespace mozilla {
- namespace css {
- class ErrorReporter;
- } // namespace css
- } // namespace mozilla
- // Token types; in close but not perfect correspondence to the token
- // categorization in section 4.1.1 of CSS2.1. (The deviations are all
- // the fault of css3-selectors, which has requirements that can only be
- // met by changing the generic tokenization.) The comment on each line
- // illustrates the form of each identifier.
- enum nsCSSTokenType {
- // White space of any kind. No value fields are used. Note that
- // comments do *not* count as white space; comments separate tokens
- // but are not themselves tokens.
- eCSSToken_Whitespace, //
- // A comment.
- eCSSToken_Comment, // /*...*/
- // Identifier-like tokens. mIdent is the text of the identifier.
- // The difference between ID and Hash is: if the text after the #
- // would have been a valid Ident if the # hadn't been there, the
- // scanner produces an ID token. Otherwise it produces a Hash token.
- // (This distinction is required by css3-selectors.)
- eCSSToken_Ident, // word
- eCSSToken_Function, // word(
- eCSSToken_AtKeyword, // @word
- eCSSToken_ID, // #word
- eCSSToken_Hash, // #0word
- // Numeric tokens. mNumber is the floating-point value of the
- // number, and mHasSign indicates whether there was an explicit sign
- // (+ or -) in front of the number. If mIntegerValid is true, the
- // number had the lexical form of an integer, and mInteger is its
- // integer value. Lexically integer values outside the range of a
- // 32-bit signed number are clamped to the maximum values; mNumber
- // will indicate a 'truer' value in that case. Percentage tokens
- // are always considered not to be integers, even if their numeric
- // value is integral (100% => mNumber = 1.0). For Dimension
- // tokens, mIdent holds the text of the unit.
- eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
- eCSSToken_Dimension, // 24px 8.5in
- eCSSToken_Percentage, // 85% 1280.4%
- // String-like tokens. In all cases, mIdent holds the text
- // belonging to the string, and mSymbol holds the delimiter
- // character, which may be ', ", or zero (only for unquoted URLs).
- // Bad_String and Bad_URL tokens are emitted when the closing
- // delimiter or parenthesis was missing.
- eCSSToken_String, // 'foo bar' "foo bar"
- eCSSToken_Bad_String, // 'foo bar
- eCSSToken_URL, // url(foobar) url("foo bar")
- eCSSToken_Bad_URL, // url(foo
- // Any one-character symbol. mSymbol holds the character.
- eCSSToken_Symbol, // . ; { } ! *
- // Match operators. These are single tokens rather than pairs of
- // Symbol tokens because css3-selectors forbids the presence of
- // comments between the two characters. No value fields are used;
- // the token type indicates which operator.
- eCSSToken_Includes, // ~=
- eCSSToken_Dashmatch, // |=
- eCSSToken_Beginsmatch, // ^=
- eCSSToken_Endsmatch, // $=
- eCSSToken_Containsmatch, // *=
- // Unicode-range token: currently used only in @font-face.
- // The lexical rule for this token includes several forms that are
- // semantically invalid. Therefore, mIdent always holds the
- // complete original text of the token (so we can print it
- // accurately in diagnostics), and mIntegerValid is true iff the
- // token is semantically valid. In that case, mInteger holds the
- // lowest value included in the range, and mInteger2 holds the
- // highest value included in the range.
- eCSSToken_URange, // U+007e U+01?? U+2000-206F
- // HTML comment delimiters, ignored as a unit when they appear at
- // the top level of a style sheet, for compatibility with websites
- // written for compatibility with pre-CSS browsers. This token type
- // subsumes the css2.1 CDO and CDC tokens, which are always treated
- // the same by the parser. mIdent holds the text of the token, for
- // diagnostics.
- eCSSToken_HTMLComment, // <!-- -->
- };
- // Classification of tokens used to determine if a "/**/" string must be
- // inserted if pasting token streams together when serializing. We include
- // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
- // as css-syntax does not treat these as whole tokens, but we will still
- // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
- // and between a '/' delim and a '*=' containsmatch.
- //
- // https://drafts.csswg.org/css-syntax/#serialization
- enum nsCSSTokenSerializationType {
- eCSSTokenSerialization_Nothing,
- eCSSTokenSerialization_Whitespace,
- eCSSTokenSerialization_AtKeyword_or_Hash,
- eCSSTokenSerialization_Number,
- eCSSTokenSerialization_Dimension,
- eCSSTokenSerialization_Percentage,
- eCSSTokenSerialization_URange,
- eCSSTokenSerialization_URL_or_BadURL,
- eCSSTokenSerialization_Function,
- eCSSTokenSerialization_Ident,
- eCSSTokenSerialization_CDC,
- eCSSTokenSerialization_DashMatch,
- eCSSTokenSerialization_ContainsMatch,
- eCSSTokenSerialization_Symbol_Hash, // '#'
- eCSSTokenSerialization_Symbol_At, // '@'
- eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
- eCSSTokenSerialization_Symbol_Minus, // '-'
- eCSSTokenSerialization_Symbol_OpenParen, // '('
- eCSSTokenSerialization_Symbol_Question, // '?'
- eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
- eCSSTokenSerialization_Symbol_Equals, // '='
- eCSSTokenSerialization_Symbol_Bar, // '|'
- eCSSTokenSerialization_Symbol_Slash, // '/'
- eCSSTokenSerialization_Symbol_Asterisk, // '*'
- eCSSTokenSerialization_Other // anything else
- };
- // A single token returned from the scanner. mType is always
- // meaningful; comments above describe which other fields are
- // meaningful for which token types.
- struct nsCSSToken {
- nsAutoString mIdent;
- float mNumber;
- int32_t mInteger;
- int32_t mInteger2;
- nsCSSTokenType mType;
- char16_t mSymbol;
- bool mIntegerValid;
- bool mHasSign;
- nsCSSToken()
- : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
- mSymbol('\0'), mIntegerValid(false), mHasSign(false)
- {}
- bool IsSymbol(char16_t aSymbol) const {
- return mType == eCSSToken_Symbol && mSymbol == aSymbol;
- }
- void AppendToString(nsString& aBuffer) const;
- };
- // Represents an nsCSSScanner's saved position in the input buffer.
- class nsCSSScannerPosition {
- friend class nsCSSScanner;
- public:
- nsCSSScannerPosition() : mInitialized(false) { }
- uint32_t LineNumber() {
- MOZ_ASSERT(mInitialized);
- return mLineNumber;
- }
- uint32_t LineOffset() {
- MOZ_ASSERT(mInitialized);
- return mLineOffset;
- }
- private:
- uint32_t mOffset;
- uint32_t mLineNumber;
- uint32_t mLineOffset;
- uint32_t mTokenLineNumber;
- uint32_t mTokenLineOffset;
- uint32_t mTokenOffset;
- bool mInitialized;
- };
- enum nsCSSScannerExclude {
- // Return all tokens, including whitespace and comments.
- eCSSScannerExclude_None,
- // Include whitespace but exclude comments.
- eCSSScannerExclude_Comments,
- // Exclude whitespace and comments.
- eCSSScannerExclude_WhitespaceAndComments
- };
- // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
- // compatible tokenization rules. Used internally by nsCSSParser;
- // not available for use by other code.
- class nsCSSScanner {
- public:
- // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
- // when the line number is unknown. The scanner does not take
- // ownership of |aBuffer|, so the caller must be sure to keep it
- // alive for the lifetime of the scanner.
- nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
- ~nsCSSScanner();
- void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
- mReporter = aReporter;
- }
- // Set whether or not we are processing SVG
- void SetSVGMode(bool aSVGMode) {
- mSVGMode = aSVGMode;
- }
- bool IsSVGMode() const {
- return mSVGMode;
- }
- // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
- void ClearSeenBadToken() { mSeenBadToken = false; }
- bool SeenBadToken() const { return mSeenBadToken; }
- // Reset or check whether a "var(" FUNCTION token has been seen.
- void ClearSeenVariableReference() { mSeenVariableReference = false; }
- bool SeenVariableReference() const { return mSeenVariableReference; }
- // Get the 1-based line number of the last character of
- // the most recently processed token.
- uint32_t GetLineNumber() const { return mTokenLineNumber; }
- // Get the 0-based column number of the first character of
- // the most recently processed token.
- uint32_t GetColumnNumber() const
- { return mTokenOffset - mTokenLineOffset; }
- uint32_t GetTokenOffset() const
- { return mTokenOffset; }
- uint32_t GetTokenEndOffset() const
- { return mOffset; }
- // Get the text of the line containing the first character of
- // the most recently processed token.
- nsDependentSubstring GetCurrentLine() const;
- // Get the next token. Return false on EOF. aTokenResult is filled
- // in with the data for the token. aSkip controls whether
- // whitespace and/or comment tokens are ever returned.
- bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
- // Get the body of an URL token (everything after the 'url(').
- // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
- // which, for historical reasons, must make additional function
- // tokens behave like url(). Please do not add new uses to the
- // parser.
- void NextURL(nsCSSToken& aTokenResult);
- // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
- // because "2n-1" is a single DIMENSION token, and "n-1" is a single
- // IDENT token, but the :nth() selector syntax wants to interpret
- // them the same as "2n -1" and "n -1" respectively. Please do not
- // add new uses to the parser.
- //
- // Note: this function may not be used to back up over a line boundary.
- void Backup(uint32_t n);
- // Starts recording the input stream from the current position.
- void StartRecording();
- // Abandons recording of the input stream.
- void StopRecording();
- // Stops recording of the input stream and appends the recorded
- // input to aBuffer.
- void StopRecording(nsString& aBuffer);
- // Returns the length of the current recording.
- uint32_t RecordingLength() const;
- #ifdef DEBUG
- bool IsRecording() const;
- #endif
- // Stores the current scanner offset into the specified object.
- void SavePosition(nsCSSScannerPosition& aState);
- // Resets the scanner offset to a position saved by SavePosition.
- void RestoreSavedPosition(const nsCSSScannerPosition& aState);
- enum EOFCharacters {
- eEOFCharacters_None = 0x0000,
- // to handle \<EOF> inside strings
- eEOFCharacters_DropBackslash = 0x0001,
- // to handle \<EOF> outside strings
- eEOFCharacters_ReplacementChar = 0x0002,
- // to close comments
- eEOFCharacters_Asterisk = 0x0004,
- eEOFCharacters_Slash = 0x0008,
- // to close double-quoted strings
- eEOFCharacters_DoubleQuote = 0x0010,
- // to close single-quoted strings
- eEOFCharacters_SingleQuote = 0x0020,
- // to close URLs
- eEOFCharacters_CloseParen = 0x0040,
- };
- // Appends any characters to the specified string the input stream to make the
- // last token not rely on special EOF handling behavior.
- //
- // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
- static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
- nsAString& aString);
- EOFCharacters GetEOFCharacters() const {
- #ifdef DEBUG
- AssertEOFCharactersValid(mEOFCharacters);
- #endif
- return mEOFCharacters;
- }
- #ifdef DEBUG
- static void AssertEOFCharactersValid(uint32_t c);
- #endif
- protected:
- int32_t Peek(uint32_t n = 0);
- void Advance(uint32_t n = 1);
- void AdvanceLine();
- void SkipWhitespace();
- void SkipComment();
- bool GatherEscape(nsString& aOutput, bool aInString);
- bool GatherText(uint8_t aClass, nsString& aIdent);
- bool ScanIdent(nsCSSToken& aResult);
- bool ScanAtKeyword(nsCSSToken& aResult);
- bool ScanHash(nsCSSToken& aResult);
- bool ScanNumber(nsCSSToken& aResult);
- bool ScanString(nsCSSToken& aResult);
- bool ScanURange(nsCSSToken& aResult);
- void SetEOFCharacters(uint32_t aEOFCharacters);
- void AddEOFCharacters(uint32_t aEOFCharacters);
- const char16_t *mBuffer;
- uint32_t mOffset;
- uint32_t mCount;
- uint32_t mLineNumber;
- uint32_t mLineOffset;
- uint32_t mTokenLineNumber;
- uint32_t mTokenLineOffset;
- uint32_t mTokenOffset;
- uint32_t mRecordStartOffset;
- EOFCharacters mEOFCharacters;
- mozilla::css::ErrorReporter *mReporter;
- // True if we are in SVG mode; false in "normal" CSS
- bool mSVGMode;
- bool mRecording;
- bool mSeenBadToken;
- bool mSeenVariableReference;
- };
- // Token for the grid-template-areas micro-syntax
- // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
- struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
- nsAutoString mName; // Empty for a null cell, non-empty for a named cell
- bool isTrash; // True for a trash token, mName is ignored in this case.
- };
- // Scanner for the grid-template-areas micro-syntax
- class nsCSSGridTemplateAreaScanner {
- public:
- explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
- // Get the next token. Return false on EOF.
- // aTokenResult is filled in with the data for the token.
- bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
- private:
- const char16_t *mBuffer;
- uint32_t mOffset;
- uint32_t mCount;
- };
- #endif /* nsCSSScanner_h___ */
|