LiteralParser.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836
  1. /*
  2. * Copyright (C) 2009 Apple Inc. All rights reserved.
  3. * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  15. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
  18. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  21. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  22. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "config.h"
  27. #include "LiteralParser.h"
  28. #include "ButterflyInlines.h"
  29. #include "CopiedSpaceInlines.h"
  30. #include "JSArray.h"
  31. #include "JSString.h"
  32. #include "Lexer.h"
  33. #include "ObjectConstructor.h"
  34. #include "Operations.h"
  35. #include "StrongInlines.h"
  36. #include <wtf/ASCIICType.h>
  37. #include <wtf/dtoa.h>
  38. #include <wtf/text/StringBuilder.h>
  39. namespace JSC {
  40. template <typename CharType>
  41. static inline bool isJSONWhiteSpace(const CharType& c)
  42. {
  43. // The JSON RFC 4627 defines a list of allowed characters to be considered
  44. // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
  45. return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
  46. }
  47. template <typename CharType>
  48. bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
  49. {
  50. if (m_lexer.next() != TokIdentifier)
  51. return false;
  52. do {
  53. Vector<JSONPPathEntry> path;
  54. // Unguarded next to start off the lexer
  55. Identifier name = Identifier(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
  56. JSONPPathEntry entry;
  57. if (name == m_exec->vm().propertyNames->varKeyword) {
  58. if (m_lexer.next() != TokIdentifier)
  59. return false;
  60. entry.m_type = JSONPPathEntryTypeDeclare;
  61. entry.m_pathEntryName = Identifier(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
  62. path.append(entry);
  63. } else {
  64. entry.m_type = JSONPPathEntryTypeDot;
  65. entry.m_pathEntryName = Identifier(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
  66. path.append(entry);
  67. }
  68. if (m_exec->vm().keywords->isKeyword(entry.m_pathEntryName))
  69. return false;
  70. TokenType tokenType = m_lexer.next();
  71. if (entry.m_type == JSONPPathEntryTypeDeclare && tokenType != TokAssign)
  72. return false;
  73. while (tokenType != TokAssign) {
  74. switch (tokenType) {
  75. case TokLBracket: {
  76. entry.m_type = JSONPPathEntryTypeLookup;
  77. if (m_lexer.next() != TokNumber)
  78. return false;
  79. double doubleIndex = m_lexer.currentToken().numberToken;
  80. int index = (int)doubleIndex;
  81. if (index != doubleIndex || index < 0)
  82. return false;
  83. entry.m_pathIndex = index;
  84. if (m_lexer.next() != TokRBracket)
  85. return false;
  86. break;
  87. }
  88. case TokDot: {
  89. entry.m_type = JSONPPathEntryTypeDot;
  90. if (m_lexer.next() != TokIdentifier)
  91. return false;
  92. entry.m_pathEntryName = Identifier(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
  93. break;
  94. }
  95. case TokLParen: {
  96. if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
  97. return false;
  98. path.last().m_type = JSONPPathEntryTypeCall;
  99. entry = path.last();
  100. goto startJSON;
  101. }
  102. default:
  103. return false;
  104. }
  105. path.append(entry);
  106. tokenType = m_lexer.next();
  107. }
  108. startJSON:
  109. m_lexer.next();
  110. results.append(JSONPData());
  111. results.last().m_value.set(m_exec->vm(), parse(StartParseExpression));
  112. if (!results.last().m_value)
  113. return false;
  114. results.last().m_path.swap(path);
  115. if (entry.m_type == JSONPPathEntryTypeCall) {
  116. if (m_lexer.currentToken().type != TokRParen)
  117. return false;
  118. m_lexer.next();
  119. }
  120. if (m_lexer.currentToken().type != TokSemi)
  121. break;
  122. m_lexer.next();
  123. } while (m_lexer.currentToken().type == TokIdentifier);
  124. return m_lexer.currentToken().type == TokEnd;
  125. }
  126. template <typename CharType>
  127. ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
  128. {
  129. if (!length)
  130. return m_exec->vm().propertyNames->emptyIdentifier;
  131. if (characters[0] >= MaximumCachableCharacter)
  132. return Identifier(&m_exec->vm(), characters, length);
  133. if (length == 1) {
  134. if (!m_shortIdentifiers[characters[0]].isNull())
  135. return m_shortIdentifiers[characters[0]];
  136. m_shortIdentifiers[characters[0]] = Identifier(&m_exec->vm(), characters, length);
  137. return m_shortIdentifiers[characters[0]];
  138. }
  139. if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
  140. return m_recentIdentifiers[characters[0]];
  141. m_recentIdentifiers[characters[0]] = Identifier(&m_exec->vm(), characters, length);
  142. return m_recentIdentifiers[characters[0]];
  143. }
  144. template <typename CharType>
  145. ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
  146. {
  147. if (!length)
  148. return m_exec->vm().propertyNames->emptyIdentifier;
  149. if (characters[0] >= MaximumCachableCharacter)
  150. return Identifier(&m_exec->vm(), characters, length);
  151. if (length == 1) {
  152. if (!m_shortIdentifiers[characters[0]].isNull())
  153. return m_shortIdentifiers[characters[0]];
  154. m_shortIdentifiers[characters[0]] = Identifier(&m_exec->vm(), characters, length);
  155. return m_shortIdentifiers[characters[0]];
  156. }
  157. if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
  158. return m_recentIdentifiers[characters[0]];
  159. m_recentIdentifiers[characters[0]] = Identifier(&m_exec->vm(), characters, length);
  160. return m_recentIdentifiers[characters[0]];
  161. }
  162. template <typename CharType>
  163. template <ParserMode mode> TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
  164. {
  165. while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
  166. ++m_ptr;
  167. ASSERT(m_ptr <= m_end);
  168. if (m_ptr >= m_end) {
  169. token.type = TokEnd;
  170. token.start = token.end = m_ptr;
  171. return TokEnd;
  172. }
  173. token.type = TokError;
  174. token.start = m_ptr;
  175. switch (*m_ptr) {
  176. case '[':
  177. token.type = TokLBracket;
  178. token.end = ++m_ptr;
  179. return TokLBracket;
  180. case ']':
  181. token.type = TokRBracket;
  182. token.end = ++m_ptr;
  183. return TokRBracket;
  184. case '(':
  185. token.type = TokLParen;
  186. token.end = ++m_ptr;
  187. return TokLParen;
  188. case ')':
  189. token.type = TokRParen;
  190. token.end = ++m_ptr;
  191. return TokRParen;
  192. case '{':
  193. token.type = TokLBrace;
  194. token.end = ++m_ptr;
  195. return TokLBrace;
  196. case '}':
  197. token.type = TokRBrace;
  198. token.end = ++m_ptr;
  199. return TokRBrace;
  200. case ',':
  201. token.type = TokComma;
  202. token.end = ++m_ptr;
  203. return TokComma;
  204. case ':':
  205. token.type = TokColon;
  206. token.end = ++m_ptr;
  207. return TokColon;
  208. case '"':
  209. return lexString<mode, '"'>(token);
  210. case 't':
  211. if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
  212. m_ptr += 4;
  213. token.type = TokTrue;
  214. token.end = m_ptr;
  215. return TokTrue;
  216. }
  217. break;
  218. case 'f':
  219. if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
  220. m_ptr += 5;
  221. token.type = TokFalse;
  222. token.end = m_ptr;
  223. return TokFalse;
  224. }
  225. break;
  226. case 'n':
  227. if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
  228. m_ptr += 4;
  229. token.type = TokNull;
  230. token.end = m_ptr;
  231. return TokNull;
  232. }
  233. break;
  234. case '-':
  235. case '0':
  236. case '1':
  237. case '2':
  238. case '3':
  239. case '4':
  240. case '5':
  241. case '6':
  242. case '7':
  243. case '8':
  244. case '9':
  245. return lexNumber(token);
  246. }
  247. if (m_ptr < m_end) {
  248. if (*m_ptr == '.') {
  249. token.type = TokDot;
  250. token.end = ++m_ptr;
  251. return TokDot;
  252. }
  253. if (*m_ptr == '=') {
  254. token.type = TokAssign;
  255. token.end = ++m_ptr;
  256. return TokAssign;
  257. }
  258. if (*m_ptr == ';') {
  259. token.type = TokSemi;
  260. token.end = ++m_ptr;
  261. return TokAssign;
  262. }
  263. if (isASCIIAlpha(*m_ptr) || *m_ptr == '_' || *m_ptr == '$')
  264. return lexIdentifier(token);
  265. if (*m_ptr == '\'') {
  266. if (mode == StrictJSON) {
  267. m_lexErrorMessage = ASCIILiteral("Single quotes (\') are not allowed in JSON");
  268. return TokError;
  269. }
  270. return lexString<mode, '\''>(token);
  271. }
  272. }
  273. m_lexErrorMessage = String::format("Unrecognized token '%c'", *m_ptr).impl();
  274. return TokError;
  275. }
  276. template <>
  277. ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
  278. {
  279. while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
  280. m_ptr++;
  281. token.stringIs8Bit = 1;
  282. token.stringToken8 = token.start;
  283. token.stringLength = m_ptr - token.start;
  284. token.type = TokIdentifier;
  285. token.end = m_ptr;
  286. return TokIdentifier;
  287. }
  288. template <>
  289. ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
  290. {
  291. while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D))
  292. m_ptr++;
  293. token.stringIs8Bit = 0;
  294. token.stringToken16 = token.start;
  295. token.stringLength = m_ptr - token.start;
  296. token.type = TokIdentifier;
  297. token.end = m_ptr;
  298. return TokIdentifier;
  299. }
  300. template <typename CharType>
  301. TokenType LiteralParser<CharType>::Lexer::next()
  302. {
  303. if (m_mode == NonStrictJSON)
  304. return lex<NonStrictJSON>(m_currentToken);
  305. if (m_mode == JSONP)
  306. return lex<JSONP>(m_currentToken);
  307. return lex<StrictJSON>(m_currentToken);
  308. }
  309. template <>
  310. ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
  311. {
  312. token.stringIs8Bit = 1;
  313. token.stringToken8 = string;
  314. }
  315. template <>
  316. ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
  317. {
  318. token.stringIs8Bit = 0;
  319. token.stringToken16 = string;
  320. }
  321. template <ParserMode mode, typename CharType, LChar terminator> static inline bool isSafeStringCharacter(LChar c)
  322. {
  323. return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && mode != StrictJSON);
  324. }
  325. template <ParserMode mode, typename CharType, UChar terminator> static inline bool isSafeStringCharacter(UChar c)
  326. {
  327. return (c >= ' ' && (mode == StrictJSON || c <= 0xff) && c != '\\' && c != terminator) || (c == '\t' && mode != StrictJSON);
  328. }
  329. template <typename CharType>
  330. template <ParserMode mode, char terminator> ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token)
  331. {
  332. ++m_ptr;
  333. const CharType* runStart = m_ptr;
  334. StringBuilder builder;
  335. do {
  336. runStart = m_ptr;
  337. while (m_ptr < m_end && isSafeStringCharacter<mode, CharType, terminator>(*m_ptr))
  338. ++m_ptr;
  339. if (builder.length())
  340. builder.append(runStart, m_ptr - runStart);
  341. if ((mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
  342. if (builder.isEmpty() && runStart < m_ptr)
  343. builder.append(runStart, m_ptr - runStart);
  344. ++m_ptr;
  345. if (m_ptr >= m_end) {
  346. m_lexErrorMessage = ASCIILiteral("Unterminated string");
  347. return TokError;
  348. }
  349. switch (*m_ptr) {
  350. case '"':
  351. builder.append('"');
  352. m_ptr++;
  353. break;
  354. case '\\':
  355. builder.append('\\');
  356. m_ptr++;
  357. break;
  358. case '/':
  359. builder.append('/');
  360. m_ptr++;
  361. break;
  362. case 'b':
  363. builder.append('\b');
  364. m_ptr++;
  365. break;
  366. case 'f':
  367. builder.append('\f');
  368. m_ptr++;
  369. break;
  370. case 'n':
  371. builder.append('\n');
  372. m_ptr++;
  373. break;
  374. case 'r':
  375. builder.append('\r');
  376. m_ptr++;
  377. break;
  378. case 't':
  379. builder.append('\t');
  380. m_ptr++;
  381. break;
  382. case 'u':
  383. if ((m_end - m_ptr) < 5) {
  384. m_lexErrorMessage = ASCIILiteral("\\u must be followed by 4 hex digits");
  385. return TokError;
  386. } // uNNNN == 5 characters
  387. for (int i = 1; i < 5; i++) {
  388. if (!isASCIIHexDigit(m_ptr[i])) {
  389. m_lexErrorMessage = String::format("\"\\%s\" is not a valid unicode escape", String(m_ptr, 5).ascii().data()).impl();
  390. return TokError;
  391. }
  392. }
  393. builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
  394. m_ptr += 5;
  395. break;
  396. default:
  397. if (*m_ptr == '\'' && mode != StrictJSON) {
  398. builder.append('\'');
  399. m_ptr++;
  400. break;
  401. }
  402. m_lexErrorMessage = String::format("Invalid escape character %c", *m_ptr).impl();
  403. return TokError;
  404. }
  405. }
  406. } while ((mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
  407. if (m_ptr >= m_end || *m_ptr != terminator) {
  408. m_lexErrorMessage = ASCIILiteral("Unterminated string");
  409. return TokError;
  410. }
  411. if (builder.isEmpty()) {
  412. token.stringBuffer = String();
  413. setParserTokenString<CharType>(token, runStart);
  414. token.stringLength = m_ptr - runStart;
  415. } else {
  416. token.stringBuffer = builder.toString();
  417. if (token.stringBuffer.is8Bit()) {
  418. token.stringIs8Bit = 1;
  419. token.stringToken8 = token.stringBuffer.characters8();
  420. } else {
  421. token.stringIs8Bit = 0;
  422. token.stringToken16 = token.stringBuffer.characters16();
  423. }
  424. token.stringLength = token.stringBuffer.length();
  425. }
  426. token.type = TokString;
  427. token.end = ++m_ptr;
  428. return TokString;
  429. }
  430. template <typename CharType>
  431. TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
  432. {
  433. // ES5 and json.org define numbers as
  434. // number
  435. // int
  436. // int frac? exp?
  437. //
  438. // int
  439. // -? 0
  440. // -? digit1-9 digits?
  441. //
  442. // digits
  443. // digit digits?
  444. //
  445. // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
  446. if (m_ptr < m_end && *m_ptr == '-') // -?
  447. ++m_ptr;
  448. // (0 | [1-9][0-9]*)
  449. if (m_ptr < m_end && *m_ptr == '0') // 0
  450. ++m_ptr;
  451. else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
  452. ++m_ptr;
  453. // [0-9]*
  454. while (m_ptr < m_end && isASCIIDigit(*m_ptr))
  455. ++m_ptr;
  456. } else {
  457. m_lexErrorMessage = ASCIILiteral("Invalid number");
  458. return TokError;
  459. }
  460. // ('.' [0-9]+)?
  461. if (m_ptr < m_end && *m_ptr == '.') {
  462. ++m_ptr;
  463. // [0-9]+
  464. if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
  465. m_lexErrorMessage = ASCIILiteral("Invalid digits after decimal point");
  466. return TokError;
  467. }
  468. ++m_ptr;
  469. while (m_ptr < m_end && isASCIIDigit(*m_ptr))
  470. ++m_ptr;
  471. } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) < 10) {
  472. int result = 0;
  473. token.type = TokNumber;
  474. token.end = m_ptr;
  475. const CharType* digit = token.start;
  476. int negative = 1;
  477. if (*digit == '-') {
  478. negative = -1;
  479. digit++;
  480. }
  481. while (digit < m_ptr)
  482. result = result * 10 + (*digit++) - '0';
  483. result *= negative;
  484. token.numberToken = result;
  485. return TokNumber;
  486. }
  487. // ([eE][+-]? [0-9]+)?
  488. if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
  489. ++m_ptr;
  490. // [-+]?
  491. if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
  492. ++m_ptr;
  493. // [0-9]+
  494. if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
  495. m_lexErrorMessage = ASCIILiteral("Exponent symbols should be followed by an optional '+' or '-' and then by at least one number");
  496. return TokError;
  497. }
  498. ++m_ptr;
  499. while (m_ptr < m_end && isASCIIDigit(*m_ptr))
  500. ++m_ptr;
  501. }
  502. token.type = TokNumber;
  503. token.end = m_ptr;
  504. size_t parsedLength;
  505. token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
  506. return TokNumber;
  507. }
  508. template <typename CharType>
  509. JSValue LiteralParser<CharType>::parse(ParserState initialState)
  510. {
  511. ParserState state = initialState;
  512. MarkedArgumentBuffer objectStack;
  513. JSValue lastValue;
  514. Vector<ParserState, 16, UnsafeVectorOverflow> stateStack;
  515. Vector<Identifier, 16, UnsafeVectorOverflow> identifierStack;
  516. while (1) {
  517. switch(state) {
  518. startParseArray:
  519. case StartParseArray: {
  520. JSArray* array = constructEmptyArray(m_exec, 0);
  521. objectStack.append(array);
  522. // fallthrough
  523. }
  524. doParseArrayStartExpression:
  525. case DoParseArrayStartExpression: {
  526. TokenType lastToken = m_lexer.currentToken().type;
  527. if (m_lexer.next() == TokRBracket) {
  528. if (lastToken == TokComma) {
  529. m_parseErrorMessage = ASCIILiteral("Unexpected comma at the end of array expression");
  530. return JSValue();
  531. }
  532. m_lexer.next();
  533. lastValue = objectStack.last();
  534. objectStack.removeLast();
  535. break;
  536. }
  537. stateStack.append(DoParseArrayEndExpression);
  538. goto startParseExpression;
  539. }
  540. case DoParseArrayEndExpression: {
  541. JSArray* array = asArray(objectStack.last());
  542. array->putDirectIndex(m_exec, array->length(), lastValue);
  543. if (m_lexer.currentToken().type == TokComma)
  544. goto doParseArrayStartExpression;
  545. if (m_lexer.currentToken().type != TokRBracket) {
  546. m_parseErrorMessage = ASCIILiteral("Expected ']'");
  547. return JSValue();
  548. }
  549. m_lexer.next();
  550. lastValue = objectStack.last();
  551. objectStack.removeLast();
  552. break;
  553. }
  554. startParseObject:
  555. case StartParseObject: {
  556. JSObject* object = constructEmptyObject(m_exec);
  557. objectStack.append(object);
  558. TokenType type = m_lexer.next();
  559. if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
  560. LiteralParserToken<CharType> identifierToken = m_lexer.currentToken();
  561. // Check for colon
  562. if (m_lexer.next() != TokColon) {
  563. m_parseErrorMessage = ASCIILiteral("Expected ':' before value in object property definition");
  564. return JSValue();
  565. }
  566. m_lexer.next();
  567. if (identifierToken.stringIs8Bit)
  568. identifierStack.append(makeIdentifier(identifierToken.stringToken8, identifierToken.stringLength));
  569. else
  570. identifierStack.append(makeIdentifier(identifierToken.stringToken16, identifierToken.stringLength));
  571. stateStack.append(DoParseObjectEndExpression);
  572. goto startParseExpression;
  573. }
  574. if (type != TokRBrace) {
  575. m_parseErrorMessage = ASCIILiteral("Expected '}'");
  576. return JSValue();
  577. }
  578. m_lexer.next();
  579. lastValue = objectStack.last();
  580. objectStack.removeLast();
  581. break;
  582. }
  583. doParseObjectStartExpression:
  584. case DoParseObjectStartExpression: {
  585. TokenType type = m_lexer.next();
  586. if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) {
  587. m_parseErrorMessage = ASCIILiteral("Property name must be a string literal");
  588. return JSValue();
  589. }
  590. LiteralParserToken<CharType> identifierToken = m_lexer.currentToken();
  591. // Check for colon
  592. if (m_lexer.next() != TokColon) {
  593. m_parseErrorMessage = ASCIILiteral("Expected ':'");
  594. return JSValue();
  595. }
  596. m_lexer.next();
  597. if (identifierToken.stringIs8Bit)
  598. identifierStack.append(makeIdentifier(identifierToken.stringToken8, identifierToken.stringLength));
  599. else
  600. identifierStack.append(makeIdentifier(identifierToken.stringToken16, identifierToken.stringLength));
  601. stateStack.append(DoParseObjectEndExpression);
  602. goto startParseExpression;
  603. }
  604. case DoParseObjectEndExpression:
  605. {
  606. JSObject* object = asObject(objectStack.last());
  607. PropertyName ident = identifierStack.last();
  608. unsigned i = ident.asIndex();
  609. if (i != PropertyName::NotAnIndex)
  610. object->putDirectIndex(m_exec, i, lastValue);
  611. else
  612. object->putDirect(m_exec->vm(), ident, lastValue);
  613. identifierStack.removeLast();
  614. if (m_lexer.currentToken().type == TokComma)
  615. goto doParseObjectStartExpression;
  616. if (m_lexer.currentToken().type != TokRBrace) {
  617. m_parseErrorMessage = ASCIILiteral("Expected '}'");
  618. return JSValue();
  619. }
  620. m_lexer.next();
  621. lastValue = objectStack.last();
  622. objectStack.removeLast();
  623. break;
  624. }
  625. startParseExpression:
  626. case StartParseExpression: {
  627. switch (m_lexer.currentToken().type) {
  628. case TokLBracket:
  629. goto startParseArray;
  630. case TokLBrace:
  631. goto startParseObject;
  632. case TokString: {
  633. LiteralParserToken<CharType> stringToken = m_lexer.currentToken();
  634. m_lexer.next();
  635. if (stringToken.stringIs8Bit)
  636. lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken8, stringToken.stringLength).string());
  637. else
  638. lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken16, stringToken.stringLength).string());
  639. break;
  640. }
  641. case TokNumber: {
  642. LiteralParserToken<CharType> numberToken = m_lexer.currentToken();
  643. m_lexer.next();
  644. lastValue = jsNumber(numberToken.numberToken);
  645. break;
  646. }
  647. case TokNull:
  648. m_lexer.next();
  649. lastValue = jsNull();
  650. break;
  651. case TokTrue:
  652. m_lexer.next();
  653. lastValue = jsBoolean(true);
  654. break;
  655. case TokFalse:
  656. m_lexer.next();
  657. lastValue = jsBoolean(false);
  658. break;
  659. case TokRBracket:
  660. m_parseErrorMessage = ASCIILiteral("Unexpected token ']'");
  661. return JSValue();
  662. case TokRBrace:
  663. m_parseErrorMessage = ASCIILiteral("Unexpected token '}'");
  664. return JSValue();
  665. case TokIdentifier: {
  666. const LiteralParserToken<CharType>& token = m_lexer.currentToken();
  667. if (token.stringIs8Bit)
  668. m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(m_lexer.currentToken().stringToken8, m_lexer.currentToken().stringLength).ascii().data()).impl();
  669. else
  670. m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(m_lexer.currentToken().stringToken16, m_lexer.currentToken().stringLength).ascii().data()).impl();
  671. return JSValue();
  672. }
  673. case TokColon:
  674. m_parseErrorMessage = ASCIILiteral("Unexpected token ':'");
  675. return JSValue();
  676. case TokLParen:
  677. m_parseErrorMessage = ASCIILiteral("Unexpected token '('");
  678. return JSValue();
  679. case TokRParen:
  680. m_parseErrorMessage = ASCIILiteral("Unexpected token ')'");
  681. return JSValue();
  682. case TokComma:
  683. m_parseErrorMessage = ASCIILiteral("Unexpected token ','");
  684. return JSValue();
  685. case TokDot:
  686. m_parseErrorMessage = ASCIILiteral("Unexpected token '.'");
  687. return JSValue();
  688. case TokAssign:
  689. m_parseErrorMessage = ASCIILiteral("Unexpected token '='");
  690. return JSValue();
  691. case TokSemi:
  692. m_parseErrorMessage = ASCIILiteral("Unexpected token ';'");
  693. return JSValue();
  694. case TokEnd:
  695. m_parseErrorMessage = ASCIILiteral("Unexpected EOF");
  696. return JSValue();
  697. case TokError:
  698. default:
  699. // Error
  700. m_parseErrorMessage = ASCIILiteral("Could not parse value expression");
  701. return JSValue();
  702. }
  703. break;
  704. }
  705. case StartParseStatement: {
  706. switch (m_lexer.currentToken().type) {
  707. case TokLBracket:
  708. case TokNumber:
  709. case TokString:
  710. goto startParseExpression;
  711. case TokLParen: {
  712. m_lexer.next();
  713. stateStack.append(StartParseStatementEndStatement);
  714. goto startParseExpression;
  715. }
  716. case TokRBracket:
  717. m_parseErrorMessage = ASCIILiteral("Unexpected token ']'");
  718. return JSValue();
  719. case TokLBrace:
  720. m_parseErrorMessage = ASCIILiteral("Unexpected token '{'");
  721. return JSValue();
  722. case TokRBrace:
  723. m_parseErrorMessage = ASCIILiteral("Unexpected token '}'");
  724. return JSValue();
  725. case TokIdentifier:
  726. m_parseErrorMessage = ASCIILiteral("Unexpected identifier");
  727. return JSValue();
  728. case TokColon:
  729. m_parseErrorMessage = ASCIILiteral("Unexpected token ':'");
  730. return JSValue();
  731. case TokRParen:
  732. m_parseErrorMessage = ASCIILiteral("Unexpected token ')'");
  733. return JSValue();
  734. case TokComma:
  735. m_parseErrorMessage = ASCIILiteral("Unexpected token ','");
  736. return JSValue();
  737. case TokTrue:
  738. m_parseErrorMessage = ASCIILiteral("Unexpected token 'true'");
  739. return JSValue();
  740. case TokFalse:
  741. m_parseErrorMessage = ASCIILiteral("Unexpected token 'false'");
  742. return JSValue();
  743. case TokNull:
  744. m_parseErrorMessage = ASCIILiteral("Unexpected token 'null'");
  745. return JSValue();
  746. case TokEnd:
  747. m_parseErrorMessage = ASCIILiteral("Unexpected EOF");
  748. return JSValue();
  749. case TokDot:
  750. m_parseErrorMessage = ASCIILiteral("Unexpected token '.'");
  751. return JSValue();
  752. case TokAssign:
  753. m_parseErrorMessage = ASCIILiteral("Unexpected token '='");
  754. return JSValue();
  755. case TokSemi:
  756. m_parseErrorMessage = ASCIILiteral("Unexpected token ';'");
  757. return JSValue();
  758. case TokError:
  759. default:
  760. m_parseErrorMessage = ASCIILiteral("Could not parse statement");
  761. return JSValue();
  762. }
  763. }
  764. case StartParseStatementEndStatement: {
  765. ASSERT(stateStack.isEmpty());
  766. if (m_lexer.currentToken().type != TokRParen)
  767. return JSValue();
  768. if (m_lexer.next() == TokEnd)
  769. return lastValue;
  770. m_parseErrorMessage = ASCIILiteral("Unexpected content at end of JSON literal");
  771. return JSValue();
  772. }
  773. default:
  774. RELEASE_ASSERT_NOT_REACHED();
  775. }
  776. if (stateStack.isEmpty())
  777. return lastValue;
  778. state = stateStack.last();
  779. stateStack.removeLast();
  780. continue;
  781. }
  782. }
  783. // Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
  784. template class LiteralParser<LChar>;
  785. template class LiteralParser<UChar>;
  786. }