nsCSSScanner.cpp 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /* tokenization of CSS style sheets */
  6. #include "nsCSSScanner.h"
  7. #include "nsStyleUtil.h"
  8. #include "nsISupportsImpl.h"
  9. #include "mozilla/ArrayUtils.h"
  10. #include "mozilla/css/ErrorReporter.h"
  11. #include "mozilla/Likely.h"
  12. #include <algorithm>
  13. using mozilla::IsNaN;
  14. /* Character class tables and related helper functions. */
  15. static const uint8_t IS_HEX_DIGIT = 0x01;
  16. static const uint8_t IS_IDSTART = 0x02;
  17. static const uint8_t IS_IDCHAR = 0x04;
  18. static const uint8_t IS_URL_CHAR = 0x08;
  19. static const uint8_t IS_HSPACE = 0x10;
  20. static const uint8_t IS_VSPACE = 0x20;
  21. static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE;
  22. static const uint8_t IS_STRING = 0x40;
  23. #define H IS_HSPACE
  24. #define V IS_VSPACE
  25. #define I IS_IDCHAR
  26. #define J IS_IDSTART
  27. #define U IS_URL_CHAR
  28. #define S IS_STRING
  29. #define X IS_HEX_DIGIT
  30. #define SH S|H
  31. #define SU S|U
  32. #define SUI S|U|I
  33. #define SUIJ S|U|I|J
  34. #define SUIX S|U|I|X
  35. #define SUIJX S|U|I|J|X
  36. static const uint8_t gLexTable[] = {
  37. // 00 01 02 03 04 05 06 07
  38. 0, S, S, S, S, S, S, S,
  39. // 08 TAB LF 0B FF CR 0E 0F
  40. S, SH, V, S, V, V, S, S,
  41. // 10 11 12 13 14 15 16 17
  42. S, S, S, S, S, S, S, S,
  43. // 18 19 1A 1B 1C 1D 1E 1F
  44. S, S, S, S, S, S, S, S,
  45. //SPC ! " # $ % & '
  46. SH, SU, 0, SU, SU, SU, SU, 0,
  47. // ( ) * + , - . /
  48. S, S, SU, SU, SU, SUI, SU, SU,
  49. // 0 1 2 3 4 5 6 7
  50. SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
  51. // 8 9 : ; < = > ?
  52. SUIX, SUIX, SU, SU, SU, SU, SU, SU,
  53. // @ A B C D E F G
  54. SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
  55. // H I J K L M N O
  56. SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  57. // P Q R S T U V W
  58. SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  59. // X Y Z [ \ ] ^ _
  60. SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ,
  61. // ` a b c d e f g
  62. SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
  63. // h i j k l m n o
  64. SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  65. // p q r s t u v w
  66. SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  67. // x y z { | } ~ 7F
  68. SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S,
  69. };
  70. static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
  71. "gLexTable expected to cover all 128 ASCII characters");
  72. #undef I
  73. #undef J
  74. #undef U
  75. #undef S
  76. #undef X
  77. #undef SH
  78. #undef SU
  79. #undef SUI
  80. #undef SUIJ
  81. #undef SUIX
  82. #undef SUIJX
  83. /**
  84. * True if 'ch' is in character class 'cls', which should be one of
  85. * the constants above or some combination of them. All characters
  86. * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
  87. */
  88. static inline bool
  89. IsOpenCharClass(int32_t ch, uint8_t cls) {
  90. return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
  91. }
  92. /**
  93. * True if 'ch' is in character class 'cls', which should be one of
  94. * the constants above or some combination of them. No characters
  95. * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
  96. */
  97. static inline bool
  98. IsClosedCharClass(int32_t ch, uint8_t cls) {
  99. return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
  100. }
  101. /**
  102. * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
  103. * TAB, LF, FF, CR, or SPC.
  104. */
  105. static inline bool
  106. IsWhitespace(int32_t ch) {
  107. return IsClosedCharClass(ch, IS_SPACE);
  108. }
  109. /**
  110. * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
  111. */
  112. static inline bool
  113. IsHorzSpace(int32_t ch) {
  114. return IsClosedCharClass(ch, IS_HSPACE);
  115. }
  116. /**
  117. * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
  118. * whitespace requires special handling when consumed, see AdvanceLine.
  119. */
  120. static inline bool
  121. IsVertSpace(int32_t ch) {
  122. return IsClosedCharClass(ch, IS_VSPACE);
  123. }
  124. /**
  125. * True if 'ch' is a character that can appear in the middle of an identifier.
  126. * This includes U+0000 since it is handled as U+FFFD, but for purposes of
  127. * GatherText it should not be included in IsOpenCharClass.
  128. */
  129. static inline bool
  130. IsIdentChar(int32_t ch) {
  131. return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
  132. }
  133. /**
  134. * True if 'ch' is a character that by itself begins an identifier.
  135. * This includes U+0000 since it is handled as U+FFFD, but for purposes of
  136. * GatherText it should not be included in IsOpenCharClass.
  137. * (This is a subset of IsIdentChar.)
  138. */
  139. static inline bool
  140. IsIdentStart(int32_t ch) {
  141. return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
  142. }
  143. /**
  144. * True if the two-character sequence aFirstChar+aSecondChar begins an
  145. * identifier.
  146. */
  147. static inline bool
  148. StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
  149. {
  150. return IsIdentStart(aFirstChar) ||
  151. (aFirstChar == '-' && (aSecondChar == '-' || IsIdentStart(aSecondChar)));
  152. }
  153. /**
  154. * True if 'ch' is a decimal digit.
  155. */
  156. static inline bool
  157. IsDigit(int32_t ch) {
  158. return (ch >= '0') && (ch <= '9');
  159. }
  160. /**
  161. * True if 'ch' is a hexadecimal digit.
  162. */
  163. static inline bool
  164. IsHexDigit(int32_t ch) {
  165. return IsClosedCharClass(ch, IS_HEX_DIGIT);
  166. }
  167. /**
  168. * Assuming that 'ch' is a decimal digit, return its numeric value.
  169. */
  170. static inline uint32_t
  171. DecimalDigitValue(int32_t ch)
  172. {
  173. return ch - '0';
  174. }
  175. /**
  176. * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
  177. */
  178. static inline uint32_t
  179. HexDigitValue(int32_t ch)
  180. {
  181. if (IsDigit(ch)) {
  182. return DecimalDigitValue(ch);
  183. } else {
  184. // Note: c&7 just keeps the low three bits which causes
  185. // upper and lower case alphabetics to both yield their
  186. // "relative to 10" value for computing the hex value.
  187. return (ch & 0x7) + 9;
  188. }
  189. }
  190. /**
  191. * If 'ch' can be the first character of a two-character match operator
  192. * token, return the token type code for that token, otherwise return
  193. * eCSSToken_Symbol to indicate that it can't.
  194. */
  195. static inline nsCSSTokenType
  196. MatchOperatorType(int32_t ch)
  197. {
  198. switch (ch) {
  199. case '~': return eCSSToken_Includes;
  200. case '|': return eCSSToken_Dashmatch;
  201. case '^': return eCSSToken_Beginsmatch;
  202. case '$': return eCSSToken_Endsmatch;
  203. case '*': return eCSSToken_Containsmatch;
  204. default: return eCSSToken_Symbol;
  205. }
  206. }
  207. /* Out-of-line nsCSSToken methods. */
  208. /**
  209. * Append the textual representation of |this| to |aBuffer|.
  210. */
  211. void
  212. nsCSSToken::AppendToString(nsString& aBuffer) const
  213. {
  214. switch (mType) {
  215. case eCSSToken_Ident:
  216. nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
  217. break;
  218. case eCSSToken_AtKeyword:
  219. aBuffer.Append('@');
  220. nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
  221. break;
  222. case eCSSToken_ID:
  223. case eCSSToken_Hash:
  224. aBuffer.Append('#');
  225. nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
  226. break;
  227. case eCSSToken_Function:
  228. nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
  229. aBuffer.Append('(');
  230. break;
  231. case eCSSToken_URL:
  232. case eCSSToken_Bad_URL:
  233. aBuffer.AppendLiteral("url(");
  234. if (mSymbol != char16_t(0)) {
  235. nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
  236. } else {
  237. aBuffer.Append(mIdent);
  238. }
  239. if (mType == eCSSToken_URL) {
  240. aBuffer.Append(char16_t(')'));
  241. }
  242. break;
  243. case eCSSToken_Number:
  244. if (mIntegerValid) {
  245. aBuffer.AppendInt(mInteger, 10);
  246. } else {
  247. aBuffer.AppendFloat(mNumber);
  248. }
  249. break;
  250. case eCSSToken_Percentage:
  251. aBuffer.AppendFloat(mNumber * 100.0f);
  252. aBuffer.Append(char16_t('%'));
  253. break;
  254. case eCSSToken_Dimension:
  255. if (mIntegerValid) {
  256. aBuffer.AppendInt(mInteger, 10);
  257. } else {
  258. aBuffer.AppendFloat(mNumber);
  259. }
  260. nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
  261. break;
  262. case eCSSToken_Bad_String:
  263. nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
  264. // remove the trailing quote character
  265. aBuffer.Truncate(aBuffer.Length() - 1);
  266. break;
  267. case eCSSToken_String:
  268. nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
  269. break;
  270. case eCSSToken_Symbol:
  271. aBuffer.Append(mSymbol);
  272. break;
  273. case eCSSToken_Whitespace:
  274. aBuffer.Append(' ');
  275. break;
  276. case eCSSToken_HTMLComment:
  277. case eCSSToken_URange:
  278. aBuffer.Append(mIdent);
  279. break;
  280. case eCSSToken_Includes:
  281. aBuffer.AppendLiteral("~=");
  282. break;
  283. case eCSSToken_Dashmatch:
  284. aBuffer.AppendLiteral("|=");
  285. break;
  286. case eCSSToken_Beginsmatch:
  287. aBuffer.AppendLiteral("^=");
  288. break;
  289. case eCSSToken_Endsmatch:
  290. aBuffer.AppendLiteral("$=");
  291. break;
  292. case eCSSToken_Containsmatch:
  293. aBuffer.AppendLiteral("*=");
  294. break;
  295. default:
  296. NS_ERROR("invalid token type");
  297. break;
  298. }
  299. }
  300. /* nsCSSScanner methods. */
  301. nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
  302. : mBuffer(aBuffer.BeginReading())
  303. , mOffset(0)
  304. , mCount(aBuffer.Length())
  305. , mLineNumber(aLineNumber)
  306. , mLineOffset(0)
  307. , mTokenLineNumber(aLineNumber)
  308. , mTokenLineOffset(0)
  309. , mTokenOffset(0)
  310. , mRecordStartOffset(0)
  311. , mEOFCharacters(eEOFCharacters_None)
  312. , mReporter(nullptr)
  313. , mSVGMode(false)
  314. , mRecording(false)
  315. , mSeenBadToken(false)
  316. , mSeenVariableReference(false)
  317. {
  318. MOZ_COUNT_CTOR(nsCSSScanner);
  319. }
  320. nsCSSScanner::~nsCSSScanner()
  321. {
  322. MOZ_COUNT_DTOR(nsCSSScanner);
  323. }
  324. void
  325. nsCSSScanner::StartRecording()
  326. {
  327. MOZ_ASSERT(!mRecording, "already started recording");
  328. mRecording = true;
  329. mRecordStartOffset = mOffset;
  330. }
  331. void
  332. nsCSSScanner::StopRecording()
  333. {
  334. MOZ_ASSERT(mRecording, "haven't started recording");
  335. mRecording = false;
  336. }
  337. void
  338. nsCSSScanner::StopRecording(nsString& aBuffer)
  339. {
  340. MOZ_ASSERT(mRecording, "haven't started recording");
  341. mRecording = false;
  342. aBuffer.Append(mBuffer + mRecordStartOffset,
  343. mOffset - mRecordStartOffset);
  344. }
  345. uint32_t
  346. nsCSSScanner::RecordingLength() const
  347. {
  348. MOZ_ASSERT(mRecording, "haven't started recording");
  349. return mOffset - mRecordStartOffset;
  350. }
  351. #ifdef DEBUG
  352. bool
  353. nsCSSScanner::IsRecording() const
  354. {
  355. return mRecording;
  356. }
  357. #endif
  358. nsDependentSubstring
  359. nsCSSScanner::GetCurrentLine() const
  360. {
  361. uint32_t end = mTokenOffset;
  362. while (end < mCount && !IsVertSpace(mBuffer[end])) {
  363. end++;
  364. }
  365. return nsDependentSubstring(mBuffer + mTokenLineOffset,
  366. mBuffer + end);
  367. }
  368. /**
  369. * Return the raw UTF-16 code unit at position |mOffset + n| within
  370. * the read buffer. If that is beyond the end of the buffer, returns
  371. * -1 to indicate end of input.
  372. */
  373. inline int32_t
  374. nsCSSScanner::Peek(uint32_t n)
  375. {
  376. if (mOffset + n >= mCount) {
  377. return -1;
  378. }
  379. return mBuffer[mOffset + n];
  380. }
  381. /**
  382. * Advance |mOffset| over |n| code units. Advance(0) is a no-op.
  383. * If |n| is greater than the distance to end of input, will silently
  384. * stop at the end. May not be used to advance over a line boundary;
  385. * AdvanceLine() must be used instead.
  386. */
  387. inline void
  388. nsCSSScanner::Advance(uint32_t n)
  389. {
  390. #ifdef DEBUG
  391. while (mOffset < mCount && n > 0) {
  392. MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
  393. "may not Advance() over a line boundary");
  394. mOffset++;
  395. n--;
  396. }
  397. #else
  398. if (mOffset + n >= mCount || mOffset + n < mOffset)
  399. mOffset = mCount;
  400. else
  401. mOffset += n;
  402. #endif
  403. }
  404. /**
  405. * Advance |mOffset| over a line boundary.
  406. */
  407. void
  408. nsCSSScanner::AdvanceLine()
  409. {
  410. MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
  411. "may not AdvanceLine() over a horizontal character");
  412. // Advance over \r\n as a unit.
  413. if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount &&
  414. mBuffer[mOffset+1] == '\n')
  415. mOffset += 2;
  416. else
  417. mOffset += 1;
  418. // 0 is a magical line number meaning that we don't know (i.e., script)
  419. if (mLineNumber != 0)
  420. mLineNumber++;
  421. mLineOffset = mOffset;
  422. }
  423. /**
  424. * Back up |mOffset| over |n| code units. Backup(0) is a no-op.
  425. * If |n| is greater than the distance to beginning of input, will
  426. * silently stop at the beginning. May not be used to back up over a
  427. * line boundary.
  428. */
  429. void
  430. nsCSSScanner::Backup(uint32_t n)
  431. {
  432. #ifdef DEBUG
  433. while (mOffset > 0 && n > 0) {
  434. MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
  435. "may not Backup() over a line boundary");
  436. mOffset--;
  437. n--;
  438. }
  439. #else
  440. if (mOffset < n)
  441. mOffset = 0;
  442. else
  443. mOffset -= n;
  444. #endif
  445. }
  446. void
  447. nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
  448. {
  449. aState.mOffset = mOffset;
  450. aState.mLineNumber = mLineNumber;
  451. aState.mLineOffset = mLineOffset;
  452. aState.mTokenLineNumber = mTokenLineNumber;
  453. aState.mTokenLineOffset = mTokenLineOffset;
  454. aState.mTokenOffset = mTokenOffset;
  455. aState.mInitialized = true;
  456. }
  457. void
  458. nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
  459. {
  460. MOZ_ASSERT(aState.mInitialized, "have not saved state");
  461. if (aState.mInitialized) {
  462. mOffset = aState.mOffset;
  463. mLineNumber = aState.mLineNumber;
  464. mLineOffset = aState.mLineOffset;
  465. mTokenLineNumber = aState.mTokenLineNumber;
  466. mTokenLineOffset = aState.mTokenLineOffset;
  467. mTokenOffset = aState.mTokenOffset;
  468. }
  469. }
  470. /**
  471. * Skip over a sequence of whitespace characters (vertical or
  472. * horizontal) starting at the current read position.
  473. */
  474. void
  475. nsCSSScanner::SkipWhitespace()
  476. {
  477. for (;;) {
  478. int32_t ch = Peek();
  479. if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
  480. break;
  481. }
  482. if (IsVertSpace(ch)) {
  483. AdvanceLine();
  484. } else {
  485. Advance();
  486. }
  487. }
  488. }
  489. /**
  490. * Skip over one CSS comment starting at the current read position.
  491. */
  492. void
  493. nsCSSScanner::SkipComment()
  494. {
  495. MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
  496. Advance(2);
  497. for (;;) {
  498. int32_t ch = Peek();
  499. if (ch < 0) {
  500. if (mReporter)
  501. mReporter->ReportUnexpectedEOF("PECommentEOF");
  502. SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
  503. return;
  504. }
  505. if (ch == '*') {
  506. Advance();
  507. ch = Peek();
  508. if (ch < 0) {
  509. if (mReporter)
  510. mReporter->ReportUnexpectedEOF("PECommentEOF");
  511. SetEOFCharacters(eEOFCharacters_Slash);
  512. return;
  513. }
  514. if (ch == '/') {
  515. Advance();
  516. return;
  517. }
  518. } else if (IsVertSpace(ch)) {
  519. AdvanceLine();
  520. } else {
  521. Advance();
  522. }
  523. }
  524. }
  525. /**
  526. * If there is a valid escape sequence starting at the current read
  527. * position, consume it, decode it, append the result to |aOutput|,
  528. * and return true. Otherwise, consume nothing, leave |aOutput|
  529. * unmodified, and return false. If |aInString| is true, accept the
  530. * additional form of escape sequence allowed within string-like tokens.
  531. */
  532. bool
  533. nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
  534. {
  535. MOZ_ASSERT(Peek() == '\\', "should not have been called");
  536. int32_t ch = Peek(1);
  537. if (ch < 0) {
  538. // If we are in a string (or a url() containing a string), we want to drop
  539. // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
  540. // character.
  541. Advance();
  542. if (aInString) {
  543. SetEOFCharacters(eEOFCharacters_DropBackslash);
  544. } else {
  545. aOutput.Append(UCS2_REPLACEMENT_CHAR);
  546. SetEOFCharacters(eEOFCharacters_ReplacementChar);
  547. }
  548. return true;
  549. }
  550. if (IsVertSpace(ch)) {
  551. if (aInString) {
  552. // In strings (and in url() containing a string), escaped
  553. // newlines are completely removed, to allow splitting over
  554. // multiple lines.
  555. Advance();
  556. AdvanceLine();
  557. return true;
  558. }
  559. // Outside of strings, backslash followed by a newline is not an escape.
  560. return false;
  561. }
  562. if (!IsHexDigit(ch)) {
  563. // "Any character (except a hexadecimal digit, linefeed, carriage
  564. // return, or form feed) can be escaped with a backslash to remove
  565. // its special meaning." -- CSS2.1 section 4.1.3
  566. Advance(2);
  567. if (ch == 0) {
  568. aOutput.Append(UCS2_REPLACEMENT_CHAR);
  569. } else {
  570. aOutput.Append(ch);
  571. }
  572. return true;
  573. }
  574. // "[at most six hexadecimal digits following a backslash] stand
  575. // for the ISO 10646 character with that number, which must not be
  576. // zero. (It is undefined in CSS 2.1 what happens if a style sheet
  577. // does contain a character with Unicode codepoint zero.)"
  578. // -- CSS2.1 section 4.1.3
  579. // At this point we know we have \ followed by at least one
  580. // hexadecimal digit, therefore the escape sequence is valid and we
  581. // can go ahead and consume the backslash.
  582. Advance();
  583. uint32_t val = 0;
  584. int i = 0;
  585. do {
  586. val = val * 16 + HexDigitValue(ch);
  587. i++;
  588. Advance();
  589. ch = Peek();
  590. } while (i < 6 && IsHexDigit(ch));
  591. // "Interpret the hex digits as a hexadecimal number. If this number is zero,
  592. // or is greater than the maximum allowed codepoint, return U+FFFD
  593. // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
  594. if (MOZ_UNLIKELY(val == 0)) {
  595. aOutput.Append(UCS2_REPLACEMENT_CHAR);
  596. } else {
  597. AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
  598. }
  599. // Consume exactly one whitespace character after a
  600. // hexadecimal escape sequence.
  601. if (IsVertSpace(ch)) {
  602. AdvanceLine();
  603. } else if (IsHorzSpace(ch)) {
  604. Advance();
  605. }
  606. return true;
  607. }
  608. /**
  609. * Consume a run of "text" beginning with the current read position,
  610. * consisting of characters in the class |aClass| (which must be a
  611. * suitable argument to IsOpenCharClass) plus escape sequences.
  612. * Append the text to |aText|, after decoding escape sequences.
  613. *
  614. * Returns true if at least one character was appended to |aText|,
  615. * false otherwise.
  616. */
  617. bool
  618. nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
  619. {
  620. // This is all of the character classes currently used with
  621. // GatherText. If you have a need to use this function with a
  622. // different class, go ahead and add it.
  623. MOZ_ASSERT(aClass == IS_STRING ||
  624. aClass == IS_IDCHAR ||
  625. aClass == IS_URL_CHAR,
  626. "possibly-inappropriate character class");
  627. uint32_t start = mOffset;
  628. bool inString = aClass == IS_STRING;
  629. for (;;) {
  630. // Consume runs of unescaped characters in one go.
  631. uint32_t n = mOffset;
  632. while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
  633. n++;
  634. }
  635. if (n > mOffset) {
  636. aText.Append(&mBuffer[mOffset], n - mOffset);
  637. mOffset = n;
  638. }
  639. if (n == mCount) {
  640. break;
  641. }
  642. int32_t ch = Peek();
  643. MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
  644. "should not have exited the inner loop");
  645. if (ch == 0) {
  646. Advance();
  647. aText.Append(UCS2_REPLACEMENT_CHAR);
  648. continue;
  649. }
  650. if (ch != '\\') {
  651. break;
  652. }
  653. if (!GatherEscape(aText, inString)) {
  654. break;
  655. }
  656. }
  657. return mOffset > start;
  658. }
  659. /**
  660. * Scan an Ident token. This also handles Function and URL tokens,
  661. * both of which begin indistinguishably from an identifier. It can
  662. * produce a Symbol token when an apparent identifier actually led
  663. * into an invalid escape sequence.
  664. */
  665. bool
  666. nsCSSScanner::ScanIdent(nsCSSToken& aToken)
  667. {
  668. if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
  669. MOZ_ASSERT(Peek() == '\\',
  670. "unexpected IsIdentStart character that did not begin an ident");
  671. aToken.mSymbol = Peek();
  672. Advance();
  673. return true;
  674. }
  675. if (MOZ_LIKELY(Peek() != '(')) {
  676. aToken.mType = eCSSToken_Ident;
  677. return true;
  678. }
  679. Advance();
  680. aToken.mType = eCSSToken_Function;
  681. if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
  682. NextURL(aToken);
  683. } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
  684. mSeenVariableReference = true;
  685. }
  686. return true;
  687. }
  688. /**
  689. * Scan an AtKeyword token. Also handles production of Symbol when
  690. * an '@' is not followed by an identifier.
  691. */
  692. bool
  693. nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
  694. {
  695. MOZ_ASSERT(Peek() == '@', "should not have been called");
  696. // Fall back for when '@' isn't followed by an identifier.
  697. aToken.mSymbol = '@';
  698. Advance();
  699. int32_t ch = Peek();
  700. if (StartsIdent(ch, Peek(1))) {
  701. if (GatherText(IS_IDCHAR, aToken.mIdent)) {
  702. aToken.mType = eCSSToken_AtKeyword;
  703. }
  704. }
  705. return true;
  706. }
  707. /**
  708. * Scan a Hash token. Handles the distinction between eCSSToken_ID
  709. * and eCSSToken_Hash, and handles production of Symbol when a '#'
  710. * is not followed by identifier characters.
  711. */
  712. bool
  713. nsCSSScanner::ScanHash(nsCSSToken& aToken)
  714. {
  715. MOZ_ASSERT(Peek() == '#', "should not have been called");
  716. // Fall back for when '#' isn't followed by identifier characters.
  717. aToken.mSymbol = '#';
  718. Advance();
  719. int32_t ch = Peek();
  720. if (IsIdentChar(ch) || ch == '\\') {
  721. nsCSSTokenType type =
  722. StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
  723. aToken.mIdent.SetLength(0);
  724. if (GatherText(IS_IDCHAR, aToken.mIdent)) {
  725. aToken.mType = type;
  726. }
  727. }
  728. return true;
  729. }
  730. /**
  731. * Scan a Number, Percentage, or Dimension token (all of which begin
  732. * like a Number). Can produce a Symbol when a '.' is not followed by
  733. * digits, or when '+' or '-' are not followed by either a digit or a
  734. * '.' and then a digit. Can also produce a HTMLComment when it
  735. * encounters '-->'.
  736. */
  737. bool
  738. nsCSSScanner::ScanNumber(nsCSSToken& aToken)
  739. {
  740. int32_t c = Peek();
  741. #ifdef DEBUG
  742. {
  743. int32_t c2 = Peek(1);
  744. int32_t c3 = Peek(2);
  745. MOZ_ASSERT(IsDigit(c) ||
  746. (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
  747. (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
  748. "should not have been called");
  749. }
  750. #endif
  751. // Sign of the mantissa (-1 or 1).
  752. int32_t sign = c == '-' ? -1 : 1;
  753. // Absolute value of the integer part of the mantissa. This is a double so
  754. // we don't run into overflow issues for consumers that only care about our
  755. // floating-point value while still being able to express the full int32_t
  756. // range for consumers who want integers.
  757. double intPart = 0;
  758. // Fractional part of the mantissa. This is a double so that when we convert
  759. // to float at the end we'll end up rounding to nearest float instead of
  760. // truncating down (as we would if fracPart were a float and we just
  761. // effectively lost the last several digits).
  762. double fracPart = 0;
  763. // Absolute value of the power of 10 that we should multiply by (only
  764. // relevant for numbers in scientific notation). Has to be a signed integer,
  765. // because multiplication of signed by unsigned converts the unsigned to
  766. // signed, so if we plan to actually multiply by expSign...
  767. int32_t exponent = 0;
  768. // Sign of the exponent.
  769. int32_t expSign = 1;
  770. aToken.mHasSign = (c == '+' || c == '-');
  771. if (aToken.mHasSign) {
  772. Advance();
  773. c = Peek();
  774. }
  775. bool gotDot = (c == '.');
  776. if (!gotDot) {
  777. // Scan the integer part of the mantissa.
  778. MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
  779. do {
  780. intPart = 10*intPart + DecimalDigitValue(c);
  781. Advance();
  782. c = Peek();
  783. } while (IsDigit(c));
  784. gotDot = (c == '.') && IsDigit(Peek(1));
  785. }
  786. if (gotDot) {
  787. // Scan the fractional part of the mantissa.
  788. Advance();
  789. c = Peek();
  790. MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
  791. // Power of ten by which we need to divide our next digit
  792. double divisor = 10;
  793. do {
  794. fracPart += DecimalDigitValue(c) / divisor;
  795. divisor *= 10;
  796. Advance();
  797. c = Peek();
  798. } while (IsDigit(c));
  799. }
  800. bool gotE = false;
  801. if (c == 'e' || c == 'E') {
  802. int32_t expSignChar = Peek(1);
  803. int32_t nextChar = Peek(2);
  804. if (IsDigit(expSignChar) ||
  805. ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
  806. gotE = true;
  807. if (expSignChar == '-') {
  808. expSign = -1;
  809. }
  810. Advance(); // consumes the E
  811. if (expSignChar == '-' || expSignChar == '+') {
  812. Advance();
  813. c = nextChar;
  814. } else {
  815. c = expSignChar;
  816. }
  817. MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
  818. do {
  819. exponent = 10*exponent + DecimalDigitValue(c);
  820. Advance();
  821. c = Peek();
  822. } while (IsDigit(c));
  823. }
  824. }
  825. nsCSSTokenType type = eCSSToken_Number;
  826. // Set mIntegerValid for all cases (except %, below) because we need
  827. // it for the "2n" in :nth-child(2n).
  828. aToken.mIntegerValid = false;
  829. // Time to reassemble our number.
  830. // Do all the math in double precision so it's truncated only once.
  831. double value = sign * (intPart + fracPart);
  832. if (gotE) {
  833. // Avoid multiplication of 0 by Infinity.
  834. if (value != 0.0) {
  835. // Explicitly cast expSign*exponent to double to avoid issues with
  836. // overloaded pow() on Windows.
  837. value *= pow(10.0, double(expSign * exponent));
  838. }
  839. } else if (!gotDot) {
  840. // Clamp values outside of integer range.
  841. if (sign > 0) {
  842. aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
  843. } else {
  844. aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
  845. }
  846. aToken.mIntegerValid = true;
  847. }
  848. nsString& ident = aToken.mIdent;
  849. // Check for Dimension and Percentage tokens.
  850. if (c >= 0) {
  851. if (StartsIdent(c, Peek(1))) {
  852. if (GatherText(IS_IDCHAR, ident)) {
  853. type = eCSSToken_Dimension;
  854. }
  855. } else if (c == '%') {
  856. Advance();
  857. type = eCSSToken_Percentage;
  858. value = value / 100.0f;
  859. aToken.mIntegerValid = false;
  860. }
  861. }
  862. MOZ_ASSERT(!IsNaN(value), "The value should not be NaN");
  863. aToken.mNumber = value;
  864. aToken.mType = type;
  865. return true;
  866. }
  867. /**
  868. * Scan a string constant ('foo' or "foo"). Will always produce
  869. * either a String or a Bad_String token; the latter occurs when the
  870. * close quote is missing. Always returns true (for convenience in Next()).
  871. */
  872. bool
  873. nsCSSScanner::ScanString(nsCSSToken& aToken)
  874. {
  875. int32_t aStop = Peek();
  876. MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
  877. aToken.mType = eCSSToken_String;
  878. aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
  879. Advance();
  880. for (;;) {
  881. GatherText(IS_STRING, aToken.mIdent);
  882. int32_t ch = Peek();
  883. if (ch == -1) {
  884. AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
  885. eEOFCharacters_SingleQuote);
  886. break; // EOF ends a string token with no error.
  887. }
  888. if (ch == aStop) {
  889. Advance();
  890. break;
  891. }
  892. // Both " and ' are excluded from IS_STRING.
  893. if (ch == '"' || ch == '\'') {
  894. aToken.mIdent.Append(ch);
  895. Advance();
  896. continue;
  897. }
  898. mSeenBadToken = true;
  899. aToken.mType = eCSSToken_Bad_String;
  900. if (mReporter)
  901. mReporter->ReportUnexpected("SEUnterminatedString", aToken);
  902. break;
  903. }
  904. return true;
  905. }
  906. /**
  907. * Scan a unicode-range token. These match the regular expression
  908. *
  909. * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
  910. *
  911. * However, some such tokens are "invalid". There are three valid forms:
  912. *
  913. * u+[0-9a-f]{x} 1 <= x <= 6
  914. * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
  915. * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
  916. *
  917. * All unicode-range tokens have their text recorded in mIdent; valid ones
  918. * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
  919. * Note that this does not validate the numeric range, only the syntactic
  920. * form.
  921. */
  922. bool
  923. nsCSSScanner::ScanURange(nsCSSToken& aResult)
  924. {
  925. int32_t intro1 = Peek();
  926. int32_t intro2 = Peek(1);
  927. int32_t ch = Peek(2);
  928. MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
  929. intro2 == '+' &&
  930. (IsHexDigit(ch) || ch == '?'),
  931. "should not have been called");
  932. aResult.mIdent.Append(intro1);
  933. aResult.mIdent.Append(intro2);
  934. Advance(2);
  935. bool valid = true;
  936. bool haveQues = false;
  937. uint32_t low = 0;
  938. uint32_t high = 0;
  939. int i = 0;
  940. do {
  941. aResult.mIdent.Append(ch);
  942. if (IsHexDigit(ch)) {
  943. if (haveQues) {
  944. valid = false; // All question marks should be at the end.
  945. }
  946. low = low*16 + HexDigitValue(ch);
  947. high = high*16 + HexDigitValue(ch);
  948. } else {
  949. haveQues = true;
  950. low = low*16 + 0x0;
  951. high = high*16 + 0xF;
  952. }
  953. i++;
  954. Advance();
  955. ch = Peek();
  956. } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
  957. if (ch == '-' && IsHexDigit(Peek(1))) {
  958. if (haveQues) {
  959. valid = false;
  960. }
  961. aResult.mIdent.Append(ch);
  962. Advance();
  963. ch = Peek();
  964. high = 0;
  965. i = 0;
  966. do {
  967. aResult.mIdent.Append(ch);
  968. high = high*16 + HexDigitValue(ch);
  969. i++;
  970. Advance();
  971. ch = Peek();
  972. } while (i < 6 && IsHexDigit(ch));
  973. }
  974. aResult.mInteger = low;
  975. aResult.mInteger2 = high;
  976. aResult.mIntegerValid = valid;
  977. aResult.mType = eCSSToken_URange;
  978. return true;
  979. }
  980. #ifdef DEBUG
  981. /* static */ void
  982. nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
  983. {
  984. MOZ_ASSERT(c == eEOFCharacters_None ||
  985. c == eEOFCharacters_ReplacementChar ||
  986. c == eEOFCharacters_Slash ||
  987. c == (eEOFCharacters_Asterisk |
  988. eEOFCharacters_Slash) ||
  989. c == eEOFCharacters_DoubleQuote ||
  990. c == eEOFCharacters_SingleQuote ||
  991. c == (eEOFCharacters_DropBackslash |
  992. eEOFCharacters_DoubleQuote) ||
  993. c == (eEOFCharacters_DropBackslash |
  994. eEOFCharacters_SingleQuote) ||
  995. c == eEOFCharacters_CloseParen ||
  996. c == (eEOFCharacters_ReplacementChar |
  997. eEOFCharacters_CloseParen) ||
  998. c == (eEOFCharacters_DoubleQuote |
  999. eEOFCharacters_CloseParen) ||
  1000. c == (eEOFCharacters_SingleQuote |
  1001. eEOFCharacters_CloseParen) ||
  1002. c == (eEOFCharacters_DropBackslash |
  1003. eEOFCharacters_DoubleQuote |
  1004. eEOFCharacters_CloseParen) ||
  1005. c == (eEOFCharacters_DropBackslash |
  1006. eEOFCharacters_SingleQuote |
  1007. eEOFCharacters_CloseParen),
  1008. "invalid EOFCharacters value");
  1009. }
  1010. #endif
  1011. void
  1012. nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
  1013. {
  1014. mEOFCharacters = EOFCharacters(aEOFCharacters);
  1015. }
  1016. void
  1017. nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
  1018. {
  1019. mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
  1020. }
  1021. static const char16_t kImpliedEOFCharacters[] = {
  1022. UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
  1023. };
  1024. /* static */ void
  1025. nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
  1026. nsAString& aResult)
  1027. {
  1028. // First, ignore eEOFCharacters_DropBackslash.
  1029. uint32_t c = aEOFCharacters >> 1;
  1030. // All of the remaining EOFCharacters bits represent appended characters,
  1031. // and the bits are in the order that they need appending.
  1032. for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
  1033. if (c & 1) {
  1034. aResult.Append(*p);
  1035. }
  1036. }
  1037. MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
  1038. }
  1039. /**
  1040. * Consume the part of an URL token after the initial 'url('. Caller
  1041. * is assumed to have consumed 'url(' already. Will always produce
  1042. * either an URL or a Bad_URL token.
  1043. *
  1044. * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
  1045. * the special lexical rules for URL tokens in a nonstandard context.
  1046. */
  1047. void
  1048. nsCSSScanner::NextURL(nsCSSToken& aToken)
  1049. {
  1050. SkipWhitespace();
  1051. // aToken.mIdent may be "url" at this point; clear that out
  1052. aToken.mIdent.Truncate();
  1053. bool hasString = false;
  1054. int32_t ch = Peek();
  1055. // Do we have a string?
  1056. if (ch == '"' || ch == '\'') {
  1057. ScanString(aToken);
  1058. if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
  1059. aToken.mType = eCSSToken_Bad_URL;
  1060. return;
  1061. }
  1062. MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
  1063. hasString = true;
  1064. } else {
  1065. // Otherwise, this is the start of a non-quoted url (which may be empty).
  1066. aToken.mSymbol = char16_t(0);
  1067. GatherText(IS_URL_CHAR, aToken.mIdent);
  1068. }
  1069. // Consume trailing whitespace and then look for a close parenthesis.
  1070. SkipWhitespace();
  1071. ch = Peek();
  1072. // ch can be less than zero indicating EOF
  1073. if (MOZ_LIKELY(ch < 0 || ch == ')')) {
  1074. Advance();
  1075. aToken.mType = eCSSToken_URL;
  1076. if (ch < 0) {
  1077. AddEOFCharacters(eEOFCharacters_CloseParen);
  1078. }
  1079. } else {
  1080. mSeenBadToken = true;
  1081. aToken.mType = eCSSToken_Bad_URL;
  1082. if (!hasString) {
  1083. // Consume until before the next right parenthesis, which follows
  1084. // how <bad-url-token> is consumed in CSS Syntax 3 spec.
  1085. // Note that, we only do this when "url(" is not followed by a
  1086. // string, because in the spec, "url(" followed by a string is
  1087. // handled as a url function rather than a <url-token>, so the
  1088. // rest of content before ")" should be consumed in balance,
  1089. // which will be done by the parser.
  1090. // The closing ")" is not consumed here. It is left to the parser
  1091. // so that the parser can handle both cases.
  1092. do {
  1093. if (IsVertSpace(ch)) {
  1094. AdvanceLine();
  1095. } else {
  1096. Advance();
  1097. }
  1098. ch = Peek();
  1099. } while (ch >= 0 && ch != ')');
  1100. }
  1101. }
  1102. }
  1103. /**
  1104. * Primary scanner entry point. Consume one token and fill in
  1105. * |aToken| accordingly. Will skip over any number of comments first,
  1106. * and will also skip over rather than return whitespace and comment
  1107. * tokens, depending on the value of |aSkip|.
  1108. *
  1109. * Returns true if it successfully consumed a token, false if EOF has
  1110. * been reached. Will always advance the current read position by at
  1111. * least one character unless called when already at EOF.
  1112. */
  1113. bool
  1114. nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
  1115. {
  1116. int32_t ch;
  1117. // do this here so we don't have to do it in dozens of other places
  1118. aToken.mIdent.Truncate();
  1119. aToken.mType = eCSSToken_Symbol;
  1120. for (;;) {
  1121. // Consume any number of comments, and possibly also whitespace tokens,
  1122. // in between other tokens.
  1123. mTokenOffset = mOffset;
  1124. mTokenLineOffset = mLineOffset;
  1125. mTokenLineNumber = mLineNumber;
  1126. ch = Peek();
  1127. if (IsWhitespace(ch)) {
  1128. SkipWhitespace();
  1129. if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
  1130. aToken.mType = eCSSToken_Whitespace;
  1131. return true;
  1132. }
  1133. continue; // start again at the beginning
  1134. }
  1135. if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
  1136. SkipComment();
  1137. if (aSkip == eCSSScannerExclude_None) {
  1138. aToken.mType = eCSSToken_Comment;
  1139. return true;
  1140. }
  1141. continue; // start again at the beginning
  1142. }
  1143. break;
  1144. }
  1145. // EOF
  1146. if (ch < 0) {
  1147. return false;
  1148. }
  1149. // 'u' could be UNICODE-RANGE or an identifier-family token
  1150. if (ch == 'u' || ch == 'U') {
  1151. int32_t c2 = Peek(1);
  1152. int32_t c3 = Peek(2);
  1153. if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
  1154. return ScanURange(aToken);
  1155. }
  1156. return ScanIdent(aToken);
  1157. }
  1158. // identifier family
  1159. if (IsIdentStart(ch)) {
  1160. return ScanIdent(aToken);
  1161. }
  1162. // number family
  1163. if (IsDigit(ch)) {
  1164. return ScanNumber(aToken);
  1165. }
  1166. if (ch == '.' && IsDigit(Peek(1))) {
  1167. return ScanNumber(aToken);
  1168. }
  1169. if (ch == '+') {
  1170. int32_t c2 = Peek(1);
  1171. if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
  1172. return ScanNumber(aToken);
  1173. }
  1174. }
  1175. // '-' can start an identifier-family token, a number-family token,
  1176. // or an HTML-comment
  1177. if (ch == '-') {
  1178. int32_t c2 = Peek(1);
  1179. int32_t c3 = Peek(2);
  1180. if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
  1181. return ScanIdent(aToken);
  1182. }
  1183. if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
  1184. return ScanNumber(aToken);
  1185. }
  1186. if (c2 == '-' && c3 == '>') {
  1187. Advance(3);
  1188. aToken.mType = eCSSToken_HTMLComment;
  1189. aToken.mIdent.AssignLiteral("-->");
  1190. return true;
  1191. }
  1192. }
  1193. // the other HTML-comment token
  1194. if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
  1195. Advance(4);
  1196. aToken.mType = eCSSToken_HTMLComment;
  1197. aToken.mIdent.AssignLiteral("<!--");
  1198. return true;
  1199. }
  1200. // AT_KEYWORD
  1201. if (ch == '@') {
  1202. return ScanAtKeyword(aToken);
  1203. }
  1204. // HASH
  1205. if (ch == '#') {
  1206. return ScanHash(aToken);
  1207. }
  1208. // STRING
  1209. if (ch == '"' || ch == '\'') {
  1210. return ScanString(aToken);
  1211. }
  1212. // Match operators: ~= |= ^= $= *=
  1213. nsCSSTokenType opType = MatchOperatorType(ch);
  1214. if (opType != eCSSToken_Symbol && Peek(1) == '=') {
  1215. aToken.mType = opType;
  1216. Advance(2);
  1217. return true;
  1218. }
  1219. // Otherwise, a symbol (DELIM).
  1220. aToken.mSymbol = ch;
  1221. Advance();
  1222. return true;
  1223. }
  1224. /* nsCSSGridTemplateAreaScanner methods. */
  1225. nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
  1226. : mBuffer(aBuffer.BeginReading())
  1227. , mOffset(0)
  1228. , mCount(aBuffer.Length())
  1229. {
  1230. }
  1231. bool
  1232. nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
  1233. {
  1234. int32_t ch;
  1235. // Skip whitespace
  1236. do {
  1237. if (mOffset >= mCount) {
  1238. return false;
  1239. }
  1240. ch = mBuffer[mOffset];
  1241. mOffset++;
  1242. } while (IsWhitespace(ch));
  1243. if (IsOpenCharClass(ch, IS_IDCHAR)) {
  1244. // Named cell token
  1245. uint32_t start = mOffset - 1; // offset of |ch|
  1246. while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
  1247. mOffset++;
  1248. }
  1249. aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
  1250. aTokenResult.isTrash = false;
  1251. } else if (ch == '.') {
  1252. // Null cell token
  1253. // Skip any other '.'
  1254. while (mOffset < mCount && mBuffer[mOffset] == '.') {
  1255. mOffset++;
  1256. }
  1257. aTokenResult.mName.Truncate();
  1258. aTokenResult.isTrash = false;
  1259. } else {
  1260. // Trash token
  1261. aTokenResult.isTrash = true;
  1262. }
  1263. return true;
  1264. }