12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403 |
- /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- /* tokenization of CSS style sheets */
- #include "nsCSSScanner.h"
- #include "nsStyleUtil.h"
- #include "nsISupportsImpl.h"
- #include "mozilla/ArrayUtils.h"
- #include "mozilla/css/ErrorReporter.h"
- #include "mozilla/Likely.h"
- #include <algorithm>
- using mozilla::IsNaN;
- /* Character class tables and related helper functions. */
- static const uint8_t IS_HEX_DIGIT = 0x01;
- static const uint8_t IS_IDSTART = 0x02;
- static const uint8_t IS_IDCHAR = 0x04;
- static const uint8_t IS_URL_CHAR = 0x08;
- static const uint8_t IS_HSPACE = 0x10;
- static const uint8_t IS_VSPACE = 0x20;
- static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE;
- static const uint8_t IS_STRING = 0x40;
- #define H IS_HSPACE
- #define V IS_VSPACE
- #define I IS_IDCHAR
- #define J IS_IDSTART
- #define U IS_URL_CHAR
- #define S IS_STRING
- #define X IS_HEX_DIGIT
- #define SH S|H
- #define SU S|U
- #define SUI S|U|I
- #define SUIJ S|U|I|J
- #define SUIX S|U|I|X
- #define SUIJX S|U|I|J|X
- static const uint8_t gLexTable[] = {
- // 00 01 02 03 04 05 06 07
- 0, S, S, S, S, S, S, S,
- // 08 TAB LF 0B FF CR 0E 0F
- S, SH, V, S, V, V, S, S,
- // 10 11 12 13 14 15 16 17
- S, S, S, S, S, S, S, S,
- // 18 19 1A 1B 1C 1D 1E 1F
- S, S, S, S, S, S, S, S,
- //SPC ! " # $ % & '
- SH, SU, 0, SU, SU, SU, SU, 0,
- // ( ) * + , - . /
- S, S, SU, SU, SU, SUI, SU, SU,
- // 0 1 2 3 4 5 6 7
- SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
- // 8 9 : ; < = > ?
- SUIX, SUIX, SU, SU, SU, SU, SU, SU,
- // @ A B C D E F G
- SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
- // H I J K L M N O
- SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
- // P Q R S T U V W
- SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
- // X Y Z [ \ ] ^ _
- SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ,
- // ` a b c d e f g
- SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
- // h i j k l m n o
- SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
- // p q r s t u v w
- SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
- // x y z { | } ~ 7F
- SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S,
- };
- static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
- "gLexTable expected to cover all 128 ASCII characters");
- #undef I
- #undef J
- #undef U
- #undef S
- #undef X
- #undef SH
- #undef SU
- #undef SUI
- #undef SUIJ
- #undef SUIX
- #undef SUIJX
- /**
- * True if 'ch' is in character class 'cls', which should be one of
- * the constants above or some combination of them. All characters
- * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
- */
- static inline bool
- IsOpenCharClass(int32_t ch, uint8_t cls) {
- return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
- }
- /**
- * True if 'ch' is in character class 'cls', which should be one of
- * the constants above or some combination of them. No characters
- * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
- */
- static inline bool
- IsClosedCharClass(int32_t ch, uint8_t cls) {
- return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
- }
- /**
- * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
- * TAB, LF, FF, CR, or SPC.
- */
- static inline bool
- IsWhitespace(int32_t ch) {
- return IsClosedCharClass(ch, IS_SPACE);
- }
- /**
- * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
- */
- static inline bool
- IsHorzSpace(int32_t ch) {
- return IsClosedCharClass(ch, IS_HSPACE);
- }
- /**
- * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
- * whitespace requires special handling when consumed, see AdvanceLine.
- */
- static inline bool
- IsVertSpace(int32_t ch) {
- return IsClosedCharClass(ch, IS_VSPACE);
- }
- /**
- * True if 'ch' is a character that can appear in the middle of an identifier.
- * This includes U+0000 since it is handled as U+FFFD, but for purposes of
- * GatherText it should not be included in IsOpenCharClass.
- */
- static inline bool
- IsIdentChar(int32_t ch) {
- return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
- }
- /**
- * True if 'ch' is a character that by itself begins an identifier.
- * This includes U+0000 since it is handled as U+FFFD, but for purposes of
- * GatherText it should not be included in IsOpenCharClass.
- * (This is a subset of IsIdentChar.)
- */
- static inline bool
- IsIdentStart(int32_t ch) {
- return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
- }
- /**
- * True if the two-character sequence aFirstChar+aSecondChar begins an
- * identifier.
- */
- static inline bool
- StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
- {
- return IsIdentStart(aFirstChar) ||
- (aFirstChar == '-' && (aSecondChar == '-' || IsIdentStart(aSecondChar)));
- }
- /**
- * True if 'ch' is a decimal digit.
- */
- static inline bool
- IsDigit(int32_t ch) {
- return (ch >= '0') && (ch <= '9');
- }
- /**
- * True if 'ch' is a hexadecimal digit.
- */
- static inline bool
- IsHexDigit(int32_t ch) {
- return IsClosedCharClass(ch, IS_HEX_DIGIT);
- }
- /**
- * Assuming that 'ch' is a decimal digit, return its numeric value.
- */
- static inline uint32_t
- DecimalDigitValue(int32_t ch)
- {
- return ch - '0';
- }
- /**
- * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
- */
- static inline uint32_t
- HexDigitValue(int32_t ch)
- {
- if (IsDigit(ch)) {
- return DecimalDigitValue(ch);
- } else {
- // Note: c&7 just keeps the low three bits which causes
- // upper and lower case alphabetics to both yield their
- // "relative to 10" value for computing the hex value.
- return (ch & 0x7) + 9;
- }
- }
- /**
- * If 'ch' can be the first character of a two-character match operator
- * token, return the token type code for that token, otherwise return
- * eCSSToken_Symbol to indicate that it can't.
- */
- static inline nsCSSTokenType
- MatchOperatorType(int32_t ch)
- {
- switch (ch) {
- case '~': return eCSSToken_Includes;
- case '|': return eCSSToken_Dashmatch;
- case '^': return eCSSToken_Beginsmatch;
- case '$': return eCSSToken_Endsmatch;
- case '*': return eCSSToken_Containsmatch;
- default: return eCSSToken_Symbol;
- }
- }
- /* Out-of-line nsCSSToken methods. */
- /**
- * Append the textual representation of |this| to |aBuffer|.
- */
- void
- nsCSSToken::AppendToString(nsString& aBuffer) const
- {
- switch (mType) {
- case eCSSToken_Ident:
- nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
- break;
- case eCSSToken_AtKeyword:
- aBuffer.Append('@');
- nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
- break;
- case eCSSToken_ID:
- case eCSSToken_Hash:
- aBuffer.Append('#');
- nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
- break;
- case eCSSToken_Function:
- nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
- aBuffer.Append('(');
- break;
- case eCSSToken_URL:
- case eCSSToken_Bad_URL:
- aBuffer.AppendLiteral("url(");
- if (mSymbol != char16_t(0)) {
- nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
- } else {
- aBuffer.Append(mIdent);
- }
- if (mType == eCSSToken_URL) {
- aBuffer.Append(char16_t(')'));
- }
- break;
- case eCSSToken_Number:
- if (mIntegerValid) {
- aBuffer.AppendInt(mInteger, 10);
- } else {
- aBuffer.AppendFloat(mNumber);
- }
- break;
- case eCSSToken_Percentage:
- aBuffer.AppendFloat(mNumber * 100.0f);
- aBuffer.Append(char16_t('%'));
- break;
- case eCSSToken_Dimension:
- if (mIntegerValid) {
- aBuffer.AppendInt(mInteger, 10);
- } else {
- aBuffer.AppendFloat(mNumber);
- }
- nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
- break;
- case eCSSToken_Bad_String:
- nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
- // remove the trailing quote character
- aBuffer.Truncate(aBuffer.Length() - 1);
- break;
- case eCSSToken_String:
- nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
- break;
- case eCSSToken_Symbol:
- aBuffer.Append(mSymbol);
- break;
- case eCSSToken_Whitespace:
- aBuffer.Append(' ');
- break;
- case eCSSToken_HTMLComment:
- case eCSSToken_URange:
- aBuffer.Append(mIdent);
- break;
- case eCSSToken_Includes:
- aBuffer.AppendLiteral("~=");
- break;
- case eCSSToken_Dashmatch:
- aBuffer.AppendLiteral("|=");
- break;
- case eCSSToken_Beginsmatch:
- aBuffer.AppendLiteral("^=");
- break;
- case eCSSToken_Endsmatch:
- aBuffer.AppendLiteral("$=");
- break;
- case eCSSToken_Containsmatch:
- aBuffer.AppendLiteral("*=");
- break;
- default:
- NS_ERROR("invalid token type");
- break;
- }
- }
- /* nsCSSScanner methods. */
- nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
- : mBuffer(aBuffer.BeginReading())
- , mOffset(0)
- , mCount(aBuffer.Length())
- , mLineNumber(aLineNumber)
- , mLineOffset(0)
- , mTokenLineNumber(aLineNumber)
- , mTokenLineOffset(0)
- , mTokenOffset(0)
- , mRecordStartOffset(0)
- , mEOFCharacters(eEOFCharacters_None)
- , mReporter(nullptr)
- , mSVGMode(false)
- , mRecording(false)
- , mSeenBadToken(false)
- , mSeenVariableReference(false)
- {
- MOZ_COUNT_CTOR(nsCSSScanner);
- }
- nsCSSScanner::~nsCSSScanner()
- {
- MOZ_COUNT_DTOR(nsCSSScanner);
- }
- void
- nsCSSScanner::StartRecording()
- {
- MOZ_ASSERT(!mRecording, "already started recording");
- mRecording = true;
- mRecordStartOffset = mOffset;
- }
- void
- nsCSSScanner::StopRecording()
- {
- MOZ_ASSERT(mRecording, "haven't started recording");
- mRecording = false;
- }
- void
- nsCSSScanner::StopRecording(nsString& aBuffer)
- {
- MOZ_ASSERT(mRecording, "haven't started recording");
- mRecording = false;
- aBuffer.Append(mBuffer + mRecordStartOffset,
- mOffset - mRecordStartOffset);
- }
- uint32_t
- nsCSSScanner::RecordingLength() const
- {
- MOZ_ASSERT(mRecording, "haven't started recording");
- return mOffset - mRecordStartOffset;
- }
- #ifdef DEBUG
- bool
- nsCSSScanner::IsRecording() const
- {
- return mRecording;
- }
- #endif
- nsDependentSubstring
- nsCSSScanner::GetCurrentLine() const
- {
- uint32_t end = mTokenOffset;
- while (end < mCount && !IsVertSpace(mBuffer[end])) {
- end++;
- }
- return nsDependentSubstring(mBuffer + mTokenLineOffset,
- mBuffer + end);
- }
- /**
- * Return the raw UTF-16 code unit at position |mOffset + n| within
- * the read buffer. If that is beyond the end of the buffer, returns
- * -1 to indicate end of input.
- */
- inline int32_t
- nsCSSScanner::Peek(uint32_t n)
- {
- if (mOffset + n >= mCount) {
- return -1;
- }
- return mBuffer[mOffset + n];
- }
- /**
- * Advance |mOffset| over |n| code units. Advance(0) is a no-op.
- * If |n| is greater than the distance to end of input, will silently
- * stop at the end. May not be used to advance over a line boundary;
- * AdvanceLine() must be used instead.
- */
- inline void
- nsCSSScanner::Advance(uint32_t n)
- {
- #ifdef DEBUG
- while (mOffset < mCount && n > 0) {
- MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
- "may not Advance() over a line boundary");
- mOffset++;
- n--;
- }
- #else
- if (mOffset + n >= mCount || mOffset + n < mOffset)
- mOffset = mCount;
- else
- mOffset += n;
- #endif
- }
- /**
- * Advance |mOffset| over a line boundary.
- */
- void
- nsCSSScanner::AdvanceLine()
- {
- MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
- "may not AdvanceLine() over a horizontal character");
- // Advance over \r\n as a unit.
- if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount &&
- mBuffer[mOffset+1] == '\n')
- mOffset += 2;
- else
- mOffset += 1;
- // 0 is a magical line number meaning that we don't know (i.e., script)
- if (mLineNumber != 0)
- mLineNumber++;
- mLineOffset = mOffset;
- }
- /**
- * Back up |mOffset| over |n| code units. Backup(0) is a no-op.
- * If |n| is greater than the distance to beginning of input, will
- * silently stop at the beginning. May not be used to back up over a
- * line boundary.
- */
- void
- nsCSSScanner::Backup(uint32_t n)
- {
- #ifdef DEBUG
- while (mOffset > 0 && n > 0) {
- MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
- "may not Backup() over a line boundary");
- mOffset--;
- n--;
- }
- #else
- if (mOffset < n)
- mOffset = 0;
- else
- mOffset -= n;
- #endif
- }
- void
- nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
- {
- aState.mOffset = mOffset;
- aState.mLineNumber = mLineNumber;
- aState.mLineOffset = mLineOffset;
- aState.mTokenLineNumber = mTokenLineNumber;
- aState.mTokenLineOffset = mTokenLineOffset;
- aState.mTokenOffset = mTokenOffset;
- aState.mInitialized = true;
- }
- void
- nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
- {
- MOZ_ASSERT(aState.mInitialized, "have not saved state");
- if (aState.mInitialized) {
- mOffset = aState.mOffset;
- mLineNumber = aState.mLineNumber;
- mLineOffset = aState.mLineOffset;
- mTokenLineNumber = aState.mTokenLineNumber;
- mTokenLineOffset = aState.mTokenLineOffset;
- mTokenOffset = aState.mTokenOffset;
- }
- }
- /**
- * Skip over a sequence of whitespace characters (vertical or
- * horizontal) starting at the current read position.
- */
- void
- nsCSSScanner::SkipWhitespace()
- {
- for (;;) {
- int32_t ch = Peek();
- if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
- break;
- }
- if (IsVertSpace(ch)) {
- AdvanceLine();
- } else {
- Advance();
- }
- }
- }
- /**
- * Skip over one CSS comment starting at the current read position.
- */
- void
- nsCSSScanner::SkipComment()
- {
- MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
- Advance(2);
- for (;;) {
- int32_t ch = Peek();
- if (ch < 0) {
- if (mReporter)
- mReporter->ReportUnexpectedEOF("PECommentEOF");
- SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
- return;
- }
- if (ch == '*') {
- Advance();
- ch = Peek();
- if (ch < 0) {
- if (mReporter)
- mReporter->ReportUnexpectedEOF("PECommentEOF");
- SetEOFCharacters(eEOFCharacters_Slash);
- return;
- }
- if (ch == '/') {
- Advance();
- return;
- }
- } else if (IsVertSpace(ch)) {
- AdvanceLine();
- } else {
- Advance();
- }
- }
- }
- /**
- * If there is a valid escape sequence starting at the current read
- * position, consume it, decode it, append the result to |aOutput|,
- * and return true. Otherwise, consume nothing, leave |aOutput|
- * unmodified, and return false. If |aInString| is true, accept the
- * additional form of escape sequence allowed within string-like tokens.
- */
- bool
- nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
- {
- MOZ_ASSERT(Peek() == '\\', "should not have been called");
- int32_t ch = Peek(1);
- if (ch < 0) {
- // If we are in a string (or a url() containing a string), we want to drop
- // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
- // character.
- Advance();
- if (aInString) {
- SetEOFCharacters(eEOFCharacters_DropBackslash);
- } else {
- aOutput.Append(UCS2_REPLACEMENT_CHAR);
- SetEOFCharacters(eEOFCharacters_ReplacementChar);
- }
- return true;
- }
- if (IsVertSpace(ch)) {
- if (aInString) {
- // In strings (and in url() containing a string), escaped
- // newlines are completely removed, to allow splitting over
- // multiple lines.
- Advance();
- AdvanceLine();
- return true;
- }
- // Outside of strings, backslash followed by a newline is not an escape.
- return false;
- }
- if (!IsHexDigit(ch)) {
- // "Any character (except a hexadecimal digit, linefeed, carriage
- // return, or form feed) can be escaped with a backslash to remove
- // its special meaning." -- CSS2.1 section 4.1.3
- Advance(2);
- if (ch == 0) {
- aOutput.Append(UCS2_REPLACEMENT_CHAR);
- } else {
- aOutput.Append(ch);
- }
- return true;
- }
- // "[at most six hexadecimal digits following a backslash] stand
- // for the ISO 10646 character with that number, which must not be
- // zero. (It is undefined in CSS 2.1 what happens if a style sheet
- // does contain a character with Unicode codepoint zero.)"
- // -- CSS2.1 section 4.1.3
- // At this point we know we have \ followed by at least one
- // hexadecimal digit, therefore the escape sequence is valid and we
- // can go ahead and consume the backslash.
- Advance();
- uint32_t val = 0;
- int i = 0;
- do {
- val = val * 16 + HexDigitValue(ch);
- i++;
- Advance();
- ch = Peek();
- } while (i < 6 && IsHexDigit(ch));
- // "Interpret the hex digits as a hexadecimal number. If this number is zero,
- // or is greater than the maximum allowed codepoint, return U+FFFD
- // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
- if (MOZ_UNLIKELY(val == 0)) {
- aOutput.Append(UCS2_REPLACEMENT_CHAR);
- } else {
- AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
- }
- // Consume exactly one whitespace character after a
- // hexadecimal escape sequence.
- if (IsVertSpace(ch)) {
- AdvanceLine();
- } else if (IsHorzSpace(ch)) {
- Advance();
- }
- return true;
- }
- /**
- * Consume a run of "text" beginning with the current read position,
- * consisting of characters in the class |aClass| (which must be a
- * suitable argument to IsOpenCharClass) plus escape sequences.
- * Append the text to |aText|, after decoding escape sequences.
- *
- * Returns true if at least one character was appended to |aText|,
- * false otherwise.
- */
- bool
- nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
- {
- // This is all of the character classes currently used with
- // GatherText. If you have a need to use this function with a
- // different class, go ahead and add it.
- MOZ_ASSERT(aClass == IS_STRING ||
- aClass == IS_IDCHAR ||
- aClass == IS_URL_CHAR,
- "possibly-inappropriate character class");
- uint32_t start = mOffset;
- bool inString = aClass == IS_STRING;
- for (;;) {
- // Consume runs of unescaped characters in one go.
- uint32_t n = mOffset;
- while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
- n++;
- }
- if (n > mOffset) {
- aText.Append(&mBuffer[mOffset], n - mOffset);
- mOffset = n;
- }
- if (n == mCount) {
- break;
- }
- int32_t ch = Peek();
- MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
- "should not have exited the inner loop");
- if (ch == 0) {
- Advance();
- aText.Append(UCS2_REPLACEMENT_CHAR);
- continue;
- }
- if (ch != '\\') {
- break;
- }
- if (!GatherEscape(aText, inString)) {
- break;
- }
- }
- return mOffset > start;
- }
- /**
- * Scan an Ident token. This also handles Function and URL tokens,
- * both of which begin indistinguishably from an identifier. It can
- * produce a Symbol token when an apparent identifier actually led
- * into an invalid escape sequence.
- */
- bool
- nsCSSScanner::ScanIdent(nsCSSToken& aToken)
- {
- if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
- MOZ_ASSERT(Peek() == '\\',
- "unexpected IsIdentStart character that did not begin an ident");
- aToken.mSymbol = Peek();
- Advance();
- return true;
- }
- if (MOZ_LIKELY(Peek() != '(')) {
- aToken.mType = eCSSToken_Ident;
- return true;
- }
- Advance();
- aToken.mType = eCSSToken_Function;
- if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
- NextURL(aToken);
- } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
- mSeenVariableReference = true;
- }
- return true;
- }
- /**
- * Scan an AtKeyword token. Also handles production of Symbol when
- * an '@' is not followed by an identifier.
- */
- bool
- nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
- {
- MOZ_ASSERT(Peek() == '@', "should not have been called");
- // Fall back for when '@' isn't followed by an identifier.
- aToken.mSymbol = '@';
- Advance();
- int32_t ch = Peek();
- if (StartsIdent(ch, Peek(1))) {
- if (GatherText(IS_IDCHAR, aToken.mIdent)) {
- aToken.mType = eCSSToken_AtKeyword;
- }
- }
- return true;
- }
- /**
- * Scan a Hash token. Handles the distinction between eCSSToken_ID
- * and eCSSToken_Hash, and handles production of Symbol when a '#'
- * is not followed by identifier characters.
- */
- bool
- nsCSSScanner::ScanHash(nsCSSToken& aToken)
- {
- MOZ_ASSERT(Peek() == '#', "should not have been called");
- // Fall back for when '#' isn't followed by identifier characters.
- aToken.mSymbol = '#';
- Advance();
- int32_t ch = Peek();
- if (IsIdentChar(ch) || ch == '\\') {
- nsCSSTokenType type =
- StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
- aToken.mIdent.SetLength(0);
- if (GatherText(IS_IDCHAR, aToken.mIdent)) {
- aToken.mType = type;
- }
- }
- return true;
- }
- /**
- * Scan a Number, Percentage, or Dimension token (all of which begin
- * like a Number). Can produce a Symbol when a '.' is not followed by
- * digits, or when '+' or '-' are not followed by either a digit or a
- * '.' and then a digit. Can also produce a HTMLComment when it
- * encounters '-->'.
- */
- bool
- nsCSSScanner::ScanNumber(nsCSSToken& aToken)
- {
- int32_t c = Peek();
- #ifdef DEBUG
- {
- int32_t c2 = Peek(1);
- int32_t c3 = Peek(2);
- MOZ_ASSERT(IsDigit(c) ||
- (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
- (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
- "should not have been called");
- }
- #endif
- // Sign of the mantissa (-1 or 1).
- int32_t sign = c == '-' ? -1 : 1;
- // Absolute value of the integer part of the mantissa. This is a double so
- // we don't run into overflow issues for consumers that only care about our
- // floating-point value while still being able to express the full int32_t
- // range for consumers who want integers.
- double intPart = 0;
- // Fractional part of the mantissa. This is a double so that when we convert
- // to float at the end we'll end up rounding to nearest float instead of
- // truncating down (as we would if fracPart were a float and we just
- // effectively lost the last several digits).
- double fracPart = 0;
- // Absolute value of the power of 10 that we should multiply by (only
- // relevant for numbers in scientific notation). Has to be a signed integer,
- // because multiplication of signed by unsigned converts the unsigned to
- // signed, so if we plan to actually multiply by expSign...
- int32_t exponent = 0;
- // Sign of the exponent.
- int32_t expSign = 1;
- aToken.mHasSign = (c == '+' || c == '-');
- if (aToken.mHasSign) {
- Advance();
- c = Peek();
- }
- bool gotDot = (c == '.');
- if (!gotDot) {
- // Scan the integer part of the mantissa.
- MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
- do {
- intPart = 10*intPart + DecimalDigitValue(c);
- Advance();
- c = Peek();
- } while (IsDigit(c));
- gotDot = (c == '.') && IsDigit(Peek(1));
- }
- if (gotDot) {
- // Scan the fractional part of the mantissa.
- Advance();
- c = Peek();
- MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
- // Power of ten by which we need to divide our next digit
- double divisor = 10;
- do {
- fracPart += DecimalDigitValue(c) / divisor;
- divisor *= 10;
- Advance();
- c = Peek();
- } while (IsDigit(c));
- }
- bool gotE = false;
- if (c == 'e' || c == 'E') {
- int32_t expSignChar = Peek(1);
- int32_t nextChar = Peek(2);
- if (IsDigit(expSignChar) ||
- ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
- gotE = true;
- if (expSignChar == '-') {
- expSign = -1;
- }
- Advance(); // consumes the E
- if (expSignChar == '-' || expSignChar == '+') {
- Advance();
- c = nextChar;
- } else {
- c = expSignChar;
- }
- MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
- do {
- exponent = 10*exponent + DecimalDigitValue(c);
- Advance();
- c = Peek();
- } while (IsDigit(c));
- }
- }
- nsCSSTokenType type = eCSSToken_Number;
- // Set mIntegerValid for all cases (except %, below) because we need
- // it for the "2n" in :nth-child(2n).
- aToken.mIntegerValid = false;
- // Time to reassemble our number.
- // Do all the math in double precision so it's truncated only once.
- double value = sign * (intPart + fracPart);
- if (gotE) {
- // Avoid multiplication of 0 by Infinity.
- if (value != 0.0) {
- // Explicitly cast expSign*exponent to double to avoid issues with
- // overloaded pow() on Windows.
- value *= pow(10.0, double(expSign * exponent));
- }
- } else if (!gotDot) {
- // Clamp values outside of integer range.
- if (sign > 0) {
- aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
- } else {
- aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
- }
- aToken.mIntegerValid = true;
- }
- nsString& ident = aToken.mIdent;
- // Check for Dimension and Percentage tokens.
- if (c >= 0) {
- if (StartsIdent(c, Peek(1))) {
- if (GatherText(IS_IDCHAR, ident)) {
- type = eCSSToken_Dimension;
- }
- } else if (c == '%') {
- Advance();
- type = eCSSToken_Percentage;
- value = value / 100.0f;
- aToken.mIntegerValid = false;
- }
- }
- MOZ_ASSERT(!IsNaN(value), "The value should not be NaN");
- aToken.mNumber = value;
- aToken.mType = type;
- return true;
- }
- /**
- * Scan a string constant ('foo' or "foo"). Will always produce
- * either a String or a Bad_String token; the latter occurs when the
- * close quote is missing. Always returns true (for convenience in Next()).
- */
- bool
- nsCSSScanner::ScanString(nsCSSToken& aToken)
- {
- int32_t aStop = Peek();
- MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
- aToken.mType = eCSSToken_String;
- aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
- Advance();
- for (;;) {
- GatherText(IS_STRING, aToken.mIdent);
- int32_t ch = Peek();
- if (ch == -1) {
- AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
- eEOFCharacters_SingleQuote);
- break; // EOF ends a string token with no error.
- }
- if (ch == aStop) {
- Advance();
- break;
- }
- // Both " and ' are excluded from IS_STRING.
- if (ch == '"' || ch == '\'') {
- aToken.mIdent.Append(ch);
- Advance();
- continue;
- }
- mSeenBadToken = true;
- aToken.mType = eCSSToken_Bad_String;
- if (mReporter)
- mReporter->ReportUnexpected("SEUnterminatedString", aToken);
- break;
- }
- return true;
- }
- /**
- * Scan a unicode-range token. These match the regular expression
- *
- * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
- *
- * However, some such tokens are "invalid". There are three valid forms:
- *
- * u+[0-9a-f]{x} 1 <= x <= 6
- * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
- * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
- *
- * All unicode-range tokens have their text recorded in mIdent; valid ones
- * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
- * Note that this does not validate the numeric range, only the syntactic
- * form.
- */
- bool
- nsCSSScanner::ScanURange(nsCSSToken& aResult)
- {
- int32_t intro1 = Peek();
- int32_t intro2 = Peek(1);
- int32_t ch = Peek(2);
- MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
- intro2 == '+' &&
- (IsHexDigit(ch) || ch == '?'),
- "should not have been called");
- aResult.mIdent.Append(intro1);
- aResult.mIdent.Append(intro2);
- Advance(2);
- bool valid = true;
- bool haveQues = false;
- uint32_t low = 0;
- uint32_t high = 0;
- int i = 0;
- do {
- aResult.mIdent.Append(ch);
- if (IsHexDigit(ch)) {
- if (haveQues) {
- valid = false; // All question marks should be at the end.
- }
- low = low*16 + HexDigitValue(ch);
- high = high*16 + HexDigitValue(ch);
- } else {
- haveQues = true;
- low = low*16 + 0x0;
- high = high*16 + 0xF;
- }
- i++;
- Advance();
- ch = Peek();
- } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
- if (ch == '-' && IsHexDigit(Peek(1))) {
- if (haveQues) {
- valid = false;
- }
- aResult.mIdent.Append(ch);
- Advance();
- ch = Peek();
- high = 0;
- i = 0;
- do {
- aResult.mIdent.Append(ch);
- high = high*16 + HexDigitValue(ch);
- i++;
- Advance();
- ch = Peek();
- } while (i < 6 && IsHexDigit(ch));
- }
- aResult.mInteger = low;
- aResult.mInteger2 = high;
- aResult.mIntegerValid = valid;
- aResult.mType = eCSSToken_URange;
- return true;
- }
- #ifdef DEBUG
- /* static */ void
- nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
- {
- MOZ_ASSERT(c == eEOFCharacters_None ||
- c == eEOFCharacters_ReplacementChar ||
- c == eEOFCharacters_Slash ||
- c == (eEOFCharacters_Asterisk |
- eEOFCharacters_Slash) ||
- c == eEOFCharacters_DoubleQuote ||
- c == eEOFCharacters_SingleQuote ||
- c == (eEOFCharacters_DropBackslash |
- eEOFCharacters_DoubleQuote) ||
- c == (eEOFCharacters_DropBackslash |
- eEOFCharacters_SingleQuote) ||
- c == eEOFCharacters_CloseParen ||
- c == (eEOFCharacters_ReplacementChar |
- eEOFCharacters_CloseParen) ||
- c == (eEOFCharacters_DoubleQuote |
- eEOFCharacters_CloseParen) ||
- c == (eEOFCharacters_SingleQuote |
- eEOFCharacters_CloseParen) ||
- c == (eEOFCharacters_DropBackslash |
- eEOFCharacters_DoubleQuote |
- eEOFCharacters_CloseParen) ||
- c == (eEOFCharacters_DropBackslash |
- eEOFCharacters_SingleQuote |
- eEOFCharacters_CloseParen),
- "invalid EOFCharacters value");
- }
- #endif
- void
- nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
- {
- mEOFCharacters = EOFCharacters(aEOFCharacters);
- }
- void
- nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
- {
- mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
- }
- static const char16_t kImpliedEOFCharacters[] = {
- UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
- };
- /* static */ void
- nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
- nsAString& aResult)
- {
- // First, ignore eEOFCharacters_DropBackslash.
- uint32_t c = aEOFCharacters >> 1;
- // All of the remaining EOFCharacters bits represent appended characters,
- // and the bits are in the order that they need appending.
- for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
- if (c & 1) {
- aResult.Append(*p);
- }
- }
- MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
- }
- /**
- * Consume the part of an URL token after the initial 'url('. Caller
- * is assumed to have consumed 'url(' already. Will always produce
- * either an URL or a Bad_URL token.
- *
- * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
- * the special lexical rules for URL tokens in a nonstandard context.
- */
- void
- nsCSSScanner::NextURL(nsCSSToken& aToken)
- {
- SkipWhitespace();
- // aToken.mIdent may be "url" at this point; clear that out
- aToken.mIdent.Truncate();
- bool hasString = false;
- int32_t ch = Peek();
- // Do we have a string?
- if (ch == '"' || ch == '\'') {
- ScanString(aToken);
- if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
- aToken.mType = eCSSToken_Bad_URL;
- return;
- }
- MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
- hasString = true;
- } else {
- // Otherwise, this is the start of a non-quoted url (which may be empty).
- aToken.mSymbol = char16_t(0);
- GatherText(IS_URL_CHAR, aToken.mIdent);
- }
- // Consume trailing whitespace and then look for a close parenthesis.
- SkipWhitespace();
- ch = Peek();
- // ch can be less than zero indicating EOF
- if (MOZ_LIKELY(ch < 0 || ch == ')')) {
- Advance();
- aToken.mType = eCSSToken_URL;
- if (ch < 0) {
- AddEOFCharacters(eEOFCharacters_CloseParen);
- }
- } else {
- mSeenBadToken = true;
- aToken.mType = eCSSToken_Bad_URL;
- if (!hasString) {
- // Consume until before the next right parenthesis, which follows
- // how <bad-url-token> is consumed in CSS Syntax 3 spec.
- // Note that, we only do this when "url(" is not followed by a
- // string, because in the spec, "url(" followed by a string is
- // handled as a url function rather than a <url-token>, so the
- // rest of content before ")" should be consumed in balance,
- // which will be done by the parser.
- // The closing ")" is not consumed here. It is left to the parser
- // so that the parser can handle both cases.
- do {
- if (IsVertSpace(ch)) {
- AdvanceLine();
- } else {
- Advance();
- }
- ch = Peek();
- } while (ch >= 0 && ch != ')');
- }
- }
- }
- /**
- * Primary scanner entry point. Consume one token and fill in
- * |aToken| accordingly. Will skip over any number of comments first,
- * and will also skip over rather than return whitespace and comment
- * tokens, depending on the value of |aSkip|.
- *
- * Returns true if it successfully consumed a token, false if EOF has
- * been reached. Will always advance the current read position by at
- * least one character unless called when already at EOF.
- */
- bool
- nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
- {
- int32_t ch;
- // do this here so we don't have to do it in dozens of other places
- aToken.mIdent.Truncate();
- aToken.mType = eCSSToken_Symbol;
- for (;;) {
- // Consume any number of comments, and possibly also whitespace tokens,
- // in between other tokens.
- mTokenOffset = mOffset;
- mTokenLineOffset = mLineOffset;
- mTokenLineNumber = mLineNumber;
- ch = Peek();
- if (IsWhitespace(ch)) {
- SkipWhitespace();
- if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
- aToken.mType = eCSSToken_Whitespace;
- return true;
- }
- continue; // start again at the beginning
- }
- if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
- SkipComment();
- if (aSkip == eCSSScannerExclude_None) {
- aToken.mType = eCSSToken_Comment;
- return true;
- }
- continue; // start again at the beginning
- }
- break;
- }
- // EOF
- if (ch < 0) {
- return false;
- }
- // 'u' could be UNICODE-RANGE or an identifier-family token
- if (ch == 'u' || ch == 'U') {
- int32_t c2 = Peek(1);
- int32_t c3 = Peek(2);
- if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
- return ScanURange(aToken);
- }
- return ScanIdent(aToken);
- }
- // identifier family
- if (IsIdentStart(ch)) {
- return ScanIdent(aToken);
- }
- // number family
- if (IsDigit(ch)) {
- return ScanNumber(aToken);
- }
- if (ch == '.' && IsDigit(Peek(1))) {
- return ScanNumber(aToken);
- }
- if (ch == '+') {
- int32_t c2 = Peek(1);
- if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
- return ScanNumber(aToken);
- }
- }
- // '-' can start an identifier-family token, a number-family token,
- // or an HTML-comment
- if (ch == '-') {
- int32_t c2 = Peek(1);
- int32_t c3 = Peek(2);
- if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
- return ScanIdent(aToken);
- }
- if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
- return ScanNumber(aToken);
- }
- if (c2 == '-' && c3 == '>') {
- Advance(3);
- aToken.mType = eCSSToken_HTMLComment;
- aToken.mIdent.AssignLiteral("-->");
- return true;
- }
- }
- // the other HTML-comment token
- if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
- Advance(4);
- aToken.mType = eCSSToken_HTMLComment;
- aToken.mIdent.AssignLiteral("<!--");
- return true;
- }
- // AT_KEYWORD
- if (ch == '@') {
- return ScanAtKeyword(aToken);
- }
- // HASH
- if (ch == '#') {
- return ScanHash(aToken);
- }
- // STRING
- if (ch == '"' || ch == '\'') {
- return ScanString(aToken);
- }
- // Match operators: ~= |= ^= $= *=
- nsCSSTokenType opType = MatchOperatorType(ch);
- if (opType != eCSSToken_Symbol && Peek(1) == '=') {
- aToken.mType = opType;
- Advance(2);
- return true;
- }
- // Otherwise, a symbol (DELIM).
- aToken.mSymbol = ch;
- Advance();
- return true;
- }
- /* nsCSSGridTemplateAreaScanner methods. */
- nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
- : mBuffer(aBuffer.BeginReading())
- , mOffset(0)
- , mCount(aBuffer.Length())
- {
- }
- bool
- nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
- {
- int32_t ch;
- // Skip whitespace
- do {
- if (mOffset >= mCount) {
- return false;
- }
- ch = mBuffer[mOffset];
- mOffset++;
- } while (IsWhitespace(ch));
- if (IsOpenCharClass(ch, IS_IDCHAR)) {
- // Named cell token
- uint32_t start = mOffset - 1; // offset of |ch|
- while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
- mOffset++;
- }
- aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
- aTokenResult.isTrash = false;
- } else if (ch == '.') {
- // Null cell token
- // Skip any other '.'
- while (mOffset < mCount && mBuffer[mOffset] == '.') {
- mOffset++;
- }
- aTokenResult.mName.Truncate();
- aTokenResult.isTrash = false;
- } else {
- // Trash token
- aTokenResult.isTrash = true;
- }
- return true;
- }
|