nsCSSScanner.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /* tokenization of CSS style sheets */
  6. #ifndef nsCSSScanner_h___
  7. #define nsCSSScanner_h___
  8. #include "nsString.h"
  9. namespace mozilla {
  10. namespace css {
  11. class ErrorReporter;
  12. } // namespace css
  13. } // namespace mozilla
  14. // Token types; in close but not perfect correspondence to the token
  15. // categorization in section 4.1.1 of CSS2.1. (The deviations are all
  16. // the fault of css3-selectors, which has requirements that can only be
  17. // met by changing the generic tokenization.) The comment on each line
  18. // illustrates the form of each identifier.
  19. enum nsCSSTokenType {
  20. // White space of any kind. No value fields are used. Note that
  21. // comments do *not* count as white space; comments separate tokens
  22. // but are not themselves tokens.
  23. eCSSToken_Whitespace, //
  24. // A comment.
  25. eCSSToken_Comment, // /*...*/
  26. // Identifier-like tokens. mIdent is the text of the identifier.
  27. // The difference between ID and Hash is: if the text after the #
  28. // would have been a valid Ident if the # hadn't been there, the
  29. // scanner produces an ID token. Otherwise it produces a Hash token.
  30. // (This distinction is required by css3-selectors.)
  31. eCSSToken_Ident, // word
  32. eCSSToken_Function, // word(
  33. eCSSToken_AtKeyword, // @word
  34. eCSSToken_ID, // #word
  35. eCSSToken_Hash, // #0word
  36. // Numeric tokens. mNumber is the floating-point value of the
  37. // number, and mHasSign indicates whether there was an explicit sign
  38. // (+ or -) in front of the number. If mIntegerValid is true, the
  39. // number had the lexical form of an integer, and mInteger is its
  40. // integer value. Lexically integer values outside the range of a
  41. // 32-bit signed number are clamped to the maximum values; mNumber
  42. // will indicate a 'truer' value in that case. Percentage tokens
  43. // are always considered not to be integers, even if their numeric
  44. // value is integral (100% => mNumber = 1.0). For Dimension
  45. // tokens, mIdent holds the text of the unit.
  46. eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
  47. eCSSToken_Dimension, // 24px 8.5in
  48. eCSSToken_Percentage, // 85% 1280.4%
  49. // String-like tokens. In all cases, mIdent holds the text
  50. // belonging to the string, and mSymbol holds the delimiter
  51. // character, which may be ', ", or zero (only for unquoted URLs).
  52. // Bad_String and Bad_URL tokens are emitted when the closing
  53. // delimiter or parenthesis was missing.
  54. eCSSToken_String, // 'foo bar' "foo bar"
  55. eCSSToken_Bad_String, // 'foo bar
  56. eCSSToken_URL, // url(foobar) url("foo bar")
  57. eCSSToken_Bad_URL, // url(foo
  58. // Any one-character symbol. mSymbol holds the character.
  59. eCSSToken_Symbol, // . ; { } ! *
  60. // Match operators. These are single tokens rather than pairs of
  61. // Symbol tokens because css3-selectors forbids the presence of
  62. // comments between the two characters. No value fields are used;
  63. // the token type indicates which operator.
  64. eCSSToken_Includes, // ~=
  65. eCSSToken_Dashmatch, // |=
  66. eCSSToken_Beginsmatch, // ^=
  67. eCSSToken_Endsmatch, // $=
  68. eCSSToken_Containsmatch, // *=
  69. // Unicode-range token: currently used only in @font-face.
  70. // The lexical rule for this token includes several forms that are
  71. // semantically invalid. Therefore, mIdent always holds the
  72. // complete original text of the token (so we can print it
  73. // accurately in diagnostics), and mIntegerValid is true iff the
  74. // token is semantically valid. In that case, mInteger holds the
  75. // lowest value included in the range, and mInteger2 holds the
  76. // highest value included in the range.
  77. eCSSToken_URange, // U+007e U+01?? U+2000-206F
  78. // HTML comment delimiters, ignored as a unit when they appear at
  79. // the top level of a style sheet, for compatibility with websites
  80. // written for compatibility with pre-CSS browsers. This token type
  81. // subsumes the css2.1 CDO and CDC tokens, which are always treated
  82. // the same by the parser. mIdent holds the text of the token, for
  83. // diagnostics.
  84. eCSSToken_HTMLComment, // <!-- -->
  85. };
  86. // Classification of tokens used to determine if a "/**/" string must be
  87. // inserted if pasting token streams together when serializing. We include
  88. // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
  89. // as css-syntax does not treat these as whole tokens, but we will still
  90. // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
  91. // and between a '/' delim and a '*=' containsmatch.
  92. //
  93. // https://drafts.csswg.org/css-syntax/#serialization
  94. enum nsCSSTokenSerializationType {
  95. eCSSTokenSerialization_Nothing,
  96. eCSSTokenSerialization_Whitespace,
  97. eCSSTokenSerialization_AtKeyword_or_Hash,
  98. eCSSTokenSerialization_Number,
  99. eCSSTokenSerialization_Dimension,
  100. eCSSTokenSerialization_Percentage,
  101. eCSSTokenSerialization_URange,
  102. eCSSTokenSerialization_URL_or_BadURL,
  103. eCSSTokenSerialization_Function,
  104. eCSSTokenSerialization_Ident,
  105. eCSSTokenSerialization_CDC,
  106. eCSSTokenSerialization_DashMatch,
  107. eCSSTokenSerialization_ContainsMatch,
  108. eCSSTokenSerialization_Symbol_Hash, // '#'
  109. eCSSTokenSerialization_Symbol_At, // '@'
  110. eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
  111. eCSSTokenSerialization_Symbol_Minus, // '-'
  112. eCSSTokenSerialization_Symbol_OpenParen, // '('
  113. eCSSTokenSerialization_Symbol_Question, // '?'
  114. eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
  115. eCSSTokenSerialization_Symbol_Equals, // '='
  116. eCSSTokenSerialization_Symbol_Bar, // '|'
  117. eCSSTokenSerialization_Symbol_Slash, // '/'
  118. eCSSTokenSerialization_Symbol_Asterisk, // '*'
  119. eCSSTokenSerialization_Other // anything else
  120. };
  121. // A single token returned from the scanner. mType is always
  122. // meaningful; comments above describe which other fields are
  123. // meaningful for which token types.
  124. struct nsCSSToken {
  125. nsAutoString mIdent;
  126. float mNumber;
  127. int32_t mInteger;
  128. int32_t mInteger2;
  129. nsCSSTokenType mType;
  130. char16_t mSymbol;
  131. bool mIntegerValid;
  132. bool mHasSign;
  133. nsCSSToken()
  134. : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
  135. mSymbol('\0'), mIntegerValid(false), mHasSign(false)
  136. {}
  137. bool IsSymbol(char16_t aSymbol) const {
  138. return mType == eCSSToken_Symbol && mSymbol == aSymbol;
  139. }
  140. void AppendToString(nsString& aBuffer) const;
  141. };
  142. // Represents an nsCSSScanner's saved position in the input buffer.
  143. class nsCSSScannerPosition {
  144. friend class nsCSSScanner;
  145. public:
  146. nsCSSScannerPosition() : mInitialized(false) { }
  147. uint32_t LineNumber() {
  148. MOZ_ASSERT(mInitialized);
  149. return mLineNumber;
  150. }
  151. uint32_t LineOffset() {
  152. MOZ_ASSERT(mInitialized);
  153. return mLineOffset;
  154. }
  155. private:
  156. uint32_t mOffset;
  157. uint32_t mLineNumber;
  158. uint32_t mLineOffset;
  159. uint32_t mTokenLineNumber;
  160. uint32_t mTokenLineOffset;
  161. uint32_t mTokenOffset;
  162. bool mInitialized;
  163. };
  164. enum nsCSSScannerExclude {
  165. // Return all tokens, including whitespace and comments.
  166. eCSSScannerExclude_None,
  167. // Include whitespace but exclude comments.
  168. eCSSScannerExclude_Comments,
  169. // Exclude whitespace and comments.
  170. eCSSScannerExclude_WhitespaceAndComments
  171. };
  172. // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
  173. // compatible tokenization rules. Used internally by nsCSSParser;
  174. // not available for use by other code.
  175. class nsCSSScanner {
  176. public:
  177. // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
  178. // when the line number is unknown. The scanner does not take
  179. // ownership of |aBuffer|, so the caller must be sure to keep it
  180. // alive for the lifetime of the scanner.
  181. nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
  182. ~nsCSSScanner();
  183. void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
  184. mReporter = aReporter;
  185. }
  186. // Set whether or not we are processing SVG
  187. void SetSVGMode(bool aSVGMode) {
  188. mSVGMode = aSVGMode;
  189. }
  190. bool IsSVGMode() const {
  191. return mSVGMode;
  192. }
  193. // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
  194. void ClearSeenBadToken() { mSeenBadToken = false; }
  195. bool SeenBadToken() const { return mSeenBadToken; }
  196. // Reset or check whether a "var(" FUNCTION token has been seen.
  197. void ClearSeenVariableReference() { mSeenVariableReference = false; }
  198. bool SeenVariableReference() const { return mSeenVariableReference; }
  199. // Get the 1-based line number of the last character of
  200. // the most recently processed token.
  201. uint32_t GetLineNumber() const { return mTokenLineNumber; }
  202. // Get the 0-based column number of the first character of
  203. // the most recently processed token.
  204. uint32_t GetColumnNumber() const
  205. { return mTokenOffset - mTokenLineOffset; }
  206. uint32_t GetTokenOffset() const
  207. { return mTokenOffset; }
  208. uint32_t GetTokenEndOffset() const
  209. { return mOffset; }
  210. // Get the text of the line containing the first character of
  211. // the most recently processed token.
  212. nsDependentSubstring GetCurrentLine() const;
  213. // Get the next token. Return false on EOF. aTokenResult is filled
  214. // in with the data for the token. aSkip controls whether
  215. // whitespace and/or comment tokens are ever returned.
  216. bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
  217. // Get the body of an URL token (everything after the 'url(').
  218. // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
  219. // which, for historical reasons, must make additional function
  220. // tokens behave like url(). Please do not add new uses to the
  221. // parser.
  222. void NextURL(nsCSSToken& aTokenResult);
  223. // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
  224. // because "2n-1" is a single DIMENSION token, and "n-1" is a single
  225. // IDENT token, but the :nth() selector syntax wants to interpret
  226. // them the same as "2n -1" and "n -1" respectively. Please do not
  227. // add new uses to the parser.
  228. //
  229. // Note: this function may not be used to back up over a line boundary.
  230. void Backup(uint32_t n);
  231. // Starts recording the input stream from the current position.
  232. void StartRecording();
  233. // Abandons recording of the input stream.
  234. void StopRecording();
  235. // Stops recording of the input stream and appends the recorded
  236. // input to aBuffer.
  237. void StopRecording(nsString& aBuffer);
  238. // Returns the length of the current recording.
  239. uint32_t RecordingLength() const;
  240. #ifdef DEBUG
  241. bool IsRecording() const;
  242. #endif
  243. // Stores the current scanner offset into the specified object.
  244. void SavePosition(nsCSSScannerPosition& aState);
  245. // Resets the scanner offset to a position saved by SavePosition.
  246. void RestoreSavedPosition(const nsCSSScannerPosition& aState);
  247. enum EOFCharacters {
  248. eEOFCharacters_None = 0x0000,
  249. // to handle \<EOF> inside strings
  250. eEOFCharacters_DropBackslash = 0x0001,
  251. // to handle \<EOF> outside strings
  252. eEOFCharacters_ReplacementChar = 0x0002,
  253. // to close comments
  254. eEOFCharacters_Asterisk = 0x0004,
  255. eEOFCharacters_Slash = 0x0008,
  256. // to close double-quoted strings
  257. eEOFCharacters_DoubleQuote = 0x0010,
  258. // to close single-quoted strings
  259. eEOFCharacters_SingleQuote = 0x0020,
  260. // to close URLs
  261. eEOFCharacters_CloseParen = 0x0040,
  262. };
  263. // Appends any characters to the specified string the input stream to make the
  264. // last token not rely on special EOF handling behavior.
  265. //
  266. // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
  267. static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
  268. nsAString& aString);
  269. EOFCharacters GetEOFCharacters() const {
  270. #ifdef DEBUG
  271. AssertEOFCharactersValid(mEOFCharacters);
  272. #endif
  273. return mEOFCharacters;
  274. }
  275. #ifdef DEBUG
  276. static void AssertEOFCharactersValid(uint32_t c);
  277. #endif
  278. protected:
  279. int32_t Peek(uint32_t n = 0);
  280. void Advance(uint32_t n = 1);
  281. void AdvanceLine();
  282. void SkipWhitespace();
  283. void SkipComment();
  284. bool GatherEscape(nsString& aOutput, bool aInString);
  285. bool GatherText(uint8_t aClass, nsString& aIdent);
  286. bool ScanIdent(nsCSSToken& aResult);
  287. bool ScanAtKeyword(nsCSSToken& aResult);
  288. bool ScanHash(nsCSSToken& aResult);
  289. bool ScanNumber(nsCSSToken& aResult);
  290. bool ScanString(nsCSSToken& aResult);
  291. bool ScanURange(nsCSSToken& aResult);
  292. void SetEOFCharacters(uint32_t aEOFCharacters);
  293. void AddEOFCharacters(uint32_t aEOFCharacters);
  294. const char16_t *mBuffer;
  295. uint32_t mOffset;
  296. uint32_t mCount;
  297. uint32_t mLineNumber;
  298. uint32_t mLineOffset;
  299. uint32_t mTokenLineNumber;
  300. uint32_t mTokenLineOffset;
  301. uint32_t mTokenOffset;
  302. uint32_t mRecordStartOffset;
  303. EOFCharacters mEOFCharacters;
  304. mozilla::css::ErrorReporter *mReporter;
  305. // True if we are in SVG mode; false in "normal" CSS
  306. bool mSVGMode;
  307. bool mRecording;
  308. bool mSeenBadToken;
  309. bool mSeenVariableReference;
  310. };
  311. // Token for the grid-template-areas micro-syntax
  312. // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
  313. struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
  314. nsAutoString mName; // Empty for a null cell, non-empty for a named cell
  315. bool isTrash; // True for a trash token, mName is ignored in this case.
  316. };
  317. // Scanner for the grid-template-areas micro-syntax
  318. class nsCSSGridTemplateAreaScanner {
  319. public:
  320. explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
  321. // Get the next token. Return false on EOF.
  322. // aTokenResult is filled in with the data for the token.
  323. bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
  324. private:
  325. const char16_t *mBuffer;
  326. uint32_t mOffset;
  327. uint32_t mCount;
  328. };
  329. #endif /* nsCSSScanner_h___ */