parse-js.js 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248
  1. /***********************************************************************
  2. A JavaScript tokenizer / parser / beautifier / compressor.
  3. This version is suitable for Node.js. With minimal changes (the
  4. exports stuff) it should work on any JS platform.
  5. This file contains the tokenizer/parser. It is a port to JavaScript
  6. of parse-js [1], a JavaScript parser library written in Common Lisp
  7. by Marijn Haverbeke. Thank you Marijn!
  8. [1] http://marijn.haverbeke.nl/parse-js/
  9. Exported functions:
  10. - tokenizer(code) -- returns a function. Call the returned
  11. function to fetch the next token.
  12. - parse(code) -- returns an AST of the given JavaScript code.
  13. -------------------------------- (C) ---------------------------------
  14. Author: Mihai Bazon
  15. <mihai.bazon@gmail.com>
  16. http://mihai.bazon.net/blog
  17. Distributed under the BSD license:
  18. Copyright 2010 (c) Mihai Bazon <mihai.bazon@gmail.com>
  19. Based on parse-js (http://marijn.haverbeke.nl/parse-js/).
  20. Redistribution and use in source and binary forms, with or without
  21. modification, are permitted provided that the following conditions
  22. are met:
  23. * Redistributions of source code must retain the above
  24. copyright notice, this list of conditions and the following
  25. disclaimer.
  26. * Redistributions in binary form must reproduce the above
  27. copyright notice, this list of conditions and the following
  28. disclaimer in the documentation and/or other materials
  29. provided with the distribution.
  30. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER “AS IS” AND ANY
  31. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  32. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  33. PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
  34. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  35. OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  36. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  37. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  38. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  39. TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
  40. THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  41. SUCH DAMAGE.
  42. ***********************************************************************/
  43. /* -----[ Tokenizer (constants) ]----- */
  44. var KEYWORDS = array_to_hash([
  45. "break",
  46. "case",
  47. "catch",
  48. "const",
  49. "continue",
  50. "default",
  51. "delete",
  52. "do",
  53. "else",
  54. "finally",
  55. "for",
  56. "function",
  57. "if",
  58. "in",
  59. "instanceof",
  60. "new",
  61. "return",
  62. "switch",
  63. "throw",
  64. "try",
  65. "typeof",
  66. "var",
  67. "void",
  68. "while",
  69. "with"
  70. ]);
  71. var RESERVED_WORDS = array_to_hash([
  72. "abstract",
  73. "boolean",
  74. "byte",
  75. "char",
  76. "class",
  77. "debugger",
  78. "double",
  79. "enum",
  80. "export",
  81. "extends",
  82. "final",
  83. "float",
  84. "goto",
  85. "implements",
  86. "import",
  87. "int",
  88. "interface",
  89. "long",
  90. "native",
  91. "package",
  92. "private",
  93. "protected",
  94. "public",
  95. "short",
  96. "static",
  97. "super",
  98. "synchronized",
  99. "throws",
  100. "transient",
  101. "volatile"
  102. ]);
  103. var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
  104. "return",
  105. "new",
  106. "delete",
  107. "throw",
  108. "else",
  109. "case"
  110. ]);
  111. var KEYWORDS_ATOM = array_to_hash([
  112. "false",
  113. "null",
  114. "true",
  115. "undefined"
  116. ]);
  117. var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
  118. var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
  119. var RE_OCT_NUMBER = /^0[0-7]+$/;
  120. var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
  121. var OPERATORS = array_to_hash([
  122. "in",
  123. "instanceof",
  124. "typeof",
  125. "new",
  126. "void",
  127. "delete",
  128. "++",
  129. "--",
  130. "+",
  131. "-",
  132. "!",
  133. "~",
  134. "&",
  135. "|",
  136. "^",
  137. "*",
  138. "/",
  139. "%",
  140. ">>",
  141. "<<",
  142. ">>>",
  143. "<",
  144. ">",
  145. "<=",
  146. ">=",
  147. "==",
  148. "===",
  149. "!=",
  150. "!==",
  151. "?",
  152. "=",
  153. "+=",
  154. "-=",
  155. "/=",
  156. "*=",
  157. "%=",
  158. ">>=",
  159. "<<=",
  160. ">>>=",
  161. "%=",
  162. "|=",
  163. "^=",
  164. "&=",
  165. "&&",
  166. "||"
  167. ]);
  168. var WHITESPACE_CHARS = array_to_hash(characters(" \n\r\t"));
  169. var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:"));
  170. var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
  171. var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
  172. /* -----[ Tokenizer ]----- */
  173. function is_alphanumeric_char(ch) {
  174. ch = ch.charCodeAt(0);
  175. return (ch >= 48 && ch <= 57) ||
  176. (ch >= 65 && ch <= 90) ||
  177. (ch >= 97 && ch <= 122);
  178. };
  179. function is_identifier_char(ch) {
  180. return is_alphanumeric_char(ch) || ch == "$" || ch == "_";
  181. };
  182. function is_digit(ch) {
  183. ch = ch.charCodeAt(0);
  184. return ch >= 48 && ch <= 57;
  185. };
  186. function parse_js_number(num) {
  187. if (RE_HEX_NUMBER.test(num)) {
  188. return parseInt(num.substr(2), 16);
  189. } else if (RE_OCT_NUMBER.test(num)) {
  190. return parseInt(num.substr(1), 8);
  191. } else if (RE_DEC_NUMBER.test(num)) {
  192. return parseFloat(num);
  193. }
  194. };
  195. function JS_Parse_Error(message, line, col, pos) {
  196. this.message = message;
  197. this.line = line;
  198. this.col = col;
  199. this.pos = pos;
  200. try {
  201. ({})();
  202. } catch(ex) {
  203. this.stack = ex.stack;
  204. };
  205. };
  206. JS_Parse_Error.prototype.toString = function() {
  207. return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack;
  208. };
  209. function js_error(message, line, col, pos) {
  210. throw new JS_Parse_Error(message, line, col, pos);
  211. };
  212. function is_token(token, type, val) {
  213. return token.type == type && (val == null || token.value == val);
  214. };
  215. var EX_EOF = {};
  216. function tokenizer($TEXT) {
  217. var S = {
  218. text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
  219. pos : 0,
  220. tokpos : 0,
  221. line : 0,
  222. tokline : 0,
  223. col : 0,
  224. tokcol : 0,
  225. newline_before : false,
  226. regex_allowed : false,
  227. comments_before : []
  228. };
  229. function peek() { return S.text.charAt(S.pos); };
  230. function next(signal_eof) {
  231. var ch = S.text.charAt(S.pos++);
  232. if (signal_eof && !ch)
  233. throw EX_EOF;
  234. if (ch == "\n") {
  235. S.newline_before = true;
  236. ++S.line;
  237. S.col = 0;
  238. } else {
  239. ++S.col;
  240. }
  241. return ch;
  242. };
  243. function eof() {
  244. return !S.peek();
  245. };
  246. function find(what, signal_eof) {
  247. var pos = S.text.indexOf(what, S.pos);
  248. if (signal_eof && pos == -1) throw EX_EOF;
  249. return pos;
  250. };
  251. function start_token() {
  252. S.tokline = S.line;
  253. S.tokcol = S.col;
  254. S.tokpos = S.pos;
  255. };
  256. function token(type, value, is_comment) {
  257. S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
  258. (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
  259. (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
  260. var ret = {
  261. type : type,
  262. value : value,
  263. line : S.tokline,
  264. col : S.tokcol,
  265. pos : S.tokpos,
  266. nlb : S.newline_before
  267. };
  268. if (!is_comment) {
  269. ret.comments_before = S.comments_before;
  270. S.comments_before = [];
  271. }
  272. S.newline_before = false;
  273. return ret;
  274. };
  275. function skip_whitespace() {
  276. while (HOP(WHITESPACE_CHARS, peek()))
  277. next();
  278. };
  279. function read_while(pred) {
  280. var ret = "", ch = peek(), i = 0;
  281. while (ch && pred(ch, i++)) {
  282. ret += next();
  283. ch = peek();
  284. }
  285. return ret;
  286. };
  287. function parse_error(err) {
  288. js_error(err, S.tokline, S.tokcol, S.tokpos);
  289. };
  290. function read_num(prefix) {
  291. var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
  292. var num = read_while(function(ch, i){
  293. if (ch == "x" || ch == "X") {
  294. if (has_x) return false;
  295. return has_x = true;
  296. }
  297. if (!has_x && (ch == "E" || ch == "e")) {
  298. if (has_e) return false;
  299. return has_e = after_e = true;
  300. }
  301. if (ch == "-") {
  302. if (after_e || (i == 0 && !prefix)) return true;
  303. return false;
  304. }
  305. if (ch == "+") return after_e;
  306. after_e = false;
  307. if (ch == ".") {
  308. if (!has_dot)
  309. return has_dot = true;
  310. return false;
  311. }
  312. return is_alphanumeric_char(ch);
  313. });
  314. if (prefix)
  315. num = prefix + num;
  316. var valid = parse_js_number(num);
  317. if (!isNaN(valid)) {
  318. return token("num", valid);
  319. } else {
  320. parse_error("Invalid syntax: " + num);
  321. }
  322. };
  323. function read_escaped_char() {
  324. var ch = next(true);
  325. switch (ch) {
  326. case "n" : return "\n";
  327. case "r" : return "\r";
  328. case "t" : return "\t";
  329. case "b" : return "\b";
  330. case "v" : return "\v";
  331. case "f" : return "\f";
  332. case "0" : return "\0";
  333. case "x" : return String.fromCharCode(hex_bytes(2));
  334. case "u" : return String.fromCharCode(hex_bytes(4));
  335. default : return ch;
  336. }
  337. };
  338. function hex_bytes(n) {
  339. var num = 0;
  340. for (; n > 0; --n) {
  341. var digit = parseInt(next(true), 16);
  342. if (isNaN(digit))
  343. parse_error("Invalid hex-character pattern in string");
  344. num = (num << 4) | digit;
  345. }
  346. return num;
  347. };
  348. function read_string() {
  349. return with_eof_error("Unterminated string constant", function(){
  350. var quote = next(), ret = "";
  351. for (;;) {
  352. var ch = next(true);
  353. if (ch == "\\") ch = read_escaped_char();
  354. else if (ch == quote) break;
  355. ret += ch;
  356. }
  357. return token("string", ret);
  358. });
  359. };
  360. function read_line_comment() {
  361. next();
  362. var i = find("\n"), ret;
  363. if (i == -1) {
  364. ret = S.text.substr(S.pos);
  365. S.pos = S.text.length;
  366. } else {
  367. ret = S.text.substring(S.pos, i);
  368. S.pos = i;
  369. }
  370. return token("comment1", ret, true);
  371. };
  372. function read_multiline_comment() {
  373. next();
  374. return with_eof_error("Unterminated multiline comment", function(){
  375. var i = find("*/", true),
  376. text = S.text.substring(S.pos, i),
  377. tok = token("comment2", text, true);
  378. S.pos = i + 2;
  379. S.line += text.split("\n").length - 1;
  380. S.newline_before = text.indexOf("\n") >= 0;
  381. return tok;
  382. });
  383. };
  384. function read_regexp() {
  385. return with_eof_error("Unterminated regular expression", function(){
  386. var prev_backslash = false, regexp = "", ch, in_class = false;
  387. while ((ch = next(true))) if (prev_backslash) {
  388. regexp += "\\" + ch;
  389. prev_backslash = false;
  390. } else if (ch == "[") {
  391. in_class = true;
  392. regexp += ch;
  393. } else if (ch == "]" && in_class) {
  394. in_class = false;
  395. regexp += ch;
  396. } else if (ch == "/" && !in_class) {
  397. break;
  398. } else if (ch == "\\") {
  399. prev_backslash = true;
  400. } else {
  401. regexp += ch;
  402. }
  403. var mods = read_while(function(ch){
  404. return HOP(REGEXP_MODIFIERS, ch);
  405. });
  406. return token("regexp", [ regexp, mods ]);
  407. });
  408. };
  409. function read_operator(prefix) {
  410. function grow(op) {
  411. if (!peek()) return op;
  412. var bigger = op + peek();
  413. if (HOP(OPERATORS, bigger)) {
  414. next();
  415. return grow(bigger);
  416. } else {
  417. return op;
  418. }
  419. };
  420. return token("operator", grow(prefix || next()));
  421. };
  422. function handle_slash() {
  423. next();
  424. var regex_allowed = S.regex_allowed;
  425. switch (peek()) {
  426. case "/":
  427. S.comments_before.push(read_line_comment());
  428. S.regex_allowed = regex_allowed;
  429. return next_token();
  430. case "*":
  431. S.comments_before.push(read_multiline_comment());
  432. S.regex_allowed = regex_allowed;
  433. return next_token();
  434. }
  435. return S.regex_allowed ? read_regexp() : read_operator("/");
  436. };
  437. function handle_dot() {
  438. next();
  439. return is_digit(peek())
  440. ? read_num(".")
  441. : token("punc", ".");
  442. };
  443. function read_word() {
  444. var word = read_while(is_identifier_char);
  445. return !HOP(KEYWORDS, word)
  446. ? token("name", word)
  447. : HOP(OPERATORS, word)
  448. ? token("operator", word)
  449. : HOP(KEYWORDS_ATOM, word)
  450. ? token("atom", word)
  451. : token("keyword", word);
  452. };
  453. function with_eof_error(eof_error, cont) {
  454. try {
  455. return cont();
  456. } catch(ex) {
  457. if (ex === EX_EOF) parse_error(eof_error);
  458. else throw ex;
  459. }
  460. };
  461. function next_token(force_regexp) {
  462. if (force_regexp)
  463. return read_regexp();
  464. skip_whitespace();
  465. start_token();
  466. var ch = peek();
  467. if (!ch) return token("eof");
  468. if (is_digit(ch)) return read_num();
  469. if (ch == '"' || ch == "'") return read_string();
  470. if (HOP(PUNC_CHARS, ch)) return token("punc", next());
  471. if (ch == ".") return handle_dot();
  472. if (ch == "/") return handle_slash();
  473. if (HOP(OPERATOR_CHARS, ch)) return read_operator();
  474. if (is_identifier_char(ch)) return read_word();
  475. parse_error("Unexpected character '" + ch + "'");
  476. };
  477. next_token.context = function(nc) {
  478. if (nc) S = nc;
  479. return S;
  480. };
  481. return next_token;
  482. };
  483. /* -----[ Parser (constants) ]----- */
  484. var UNARY_PREFIX = array_to_hash([
  485. "typeof",
  486. "void",
  487. "delete",
  488. "--",
  489. "++",
  490. "!",
  491. "~",
  492. "-",
  493. "+"
  494. ]);
  495. var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
  496. var ASSIGNMENT = (function(a, ret, i){
  497. while (i < a.length) {
  498. ret[a[i]] = a[i].substr(0, a[i].length - 1);
  499. i++;
  500. }
  501. return ret;
  502. })(
  503. ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="],
  504. { "=": true },
  505. 0
  506. );
  507. var PRECEDENCE = (function(a, ret){
  508. for (var i = 0, n = 1; i < a.length; ++i, ++n) {
  509. var b = a[i];
  510. for (var j = 0; j < b.length; ++j) {
  511. ret[b[j]] = n;
  512. }
  513. }
  514. return ret;
  515. })(
  516. [
  517. ["||"],
  518. ["&&"],
  519. ["|"],
  520. ["^"],
  521. ["&"],
  522. ["==", "===", "!=", "!=="],
  523. ["<", ">", "<=", ">=", "in", "instanceof"],
  524. [">>", "<<", ">>>"],
  525. ["+", "-"],
  526. ["*", "/", "%"]
  527. ],
  528. {}
  529. );
  530. var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]);
  531. var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]);
  532. /* -----[ Parser ]----- */
  533. function NodeWithToken(str, start, end) {
  534. this.name = str;
  535. this.start = start;
  536. this.end = end;
  537. };
  538. NodeWithToken.prototype.toString = function() { return this.name; };
  539. function parse($TEXT, strict_mode, embed_tokens) {
  540. var S = {
  541. input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
  542. token : null,
  543. prev : null,
  544. peeked : null,
  545. in_function : 0,
  546. in_loop : 0,
  547. labels : []
  548. };
  549. S.token = next();
  550. function is(type, value) {
  551. return is_token(S.token, type, value);
  552. };
  553. function peek() { return S.peeked || (S.peeked = S.input()); };
  554. function next() {
  555. S.prev = S.token;
  556. if (S.peeked) {
  557. S.token = S.peeked;
  558. S.peeked = null;
  559. } else {
  560. S.token = S.input();
  561. }
  562. return S.token;
  563. };
  564. function prev() {
  565. return S.prev;
  566. };
  567. function croak(msg, line, col, pos) {
  568. var ctx = S.input.context();
  569. js_error(msg,
  570. line != null ? line : ctx.tokline,
  571. col != null ? col : ctx.tokcol,
  572. pos != null ? pos : ctx.tokpos);
  573. };
  574. function token_error(token, msg) {
  575. croak(msg, token.line, token.col);
  576. };
  577. function unexpected(token) {
  578. if (token == null)
  579. token = S.token;
  580. token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
  581. };
  582. function expect_token(type, val) {
  583. if (is(type, val)) {
  584. return next();
  585. }
  586. token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
  587. };
  588. function expect(punc) { return expect_token("punc", punc); };
  589. function can_insert_semicolon() {
  590. return !strict_mode && (
  591. S.token.nlb || is("eof") || is("punc", "}")
  592. );
  593. };
  594. function semicolon() {
  595. if (is("punc", ";")) next();
  596. else if (!can_insert_semicolon()) unexpected();
  597. };
  598. function as() {
  599. return slice(arguments);
  600. };
  601. function parenthesised() {
  602. expect("(");
  603. var ex = expression();
  604. expect(")");
  605. return ex;
  606. };
  607. function add_tokens(str, start, end) {
  608. return new NodeWithToken(str, start, end);
  609. };
  610. var statement = embed_tokens ? function() {
  611. var start = S.token;
  612. var stmt = $statement();
  613. stmt[0] = add_tokens(stmt[0], start, prev());
  614. return stmt;
  615. } : $statement;
  616. function $statement() {
  617. if (is("operator", "/")) {
  618. S.peeked = null;
  619. S.token = S.input(true); // force regexp
  620. }
  621. switch (S.token.type) {
  622. case "num":
  623. case "string":
  624. case "regexp":
  625. case "operator":
  626. case "atom":
  627. return simple_statement();
  628. case "name":
  629. return is_token(peek(), "punc", ":")
  630. ? labeled_statement(prog1(S.token.value, next, next))
  631. : simple_statement();
  632. case "punc":
  633. switch (S.token.value) {
  634. case "{":
  635. return as("block", block_());
  636. case "[":
  637. case "(":
  638. return simple_statement();
  639. case ";":
  640. next();
  641. return as("block");
  642. default:
  643. unexpected();
  644. }
  645. case "keyword":
  646. switch (prog1(S.token.value, next)) {
  647. case "break":
  648. return break_cont("break");
  649. case "continue":
  650. return break_cont("continue");
  651. case "debugger":
  652. semicolon();
  653. return as("debugger");
  654. case "do":
  655. return (function(body){
  656. expect_token("keyword", "while");
  657. return as("do", prog1(parenthesised, semicolon), body);
  658. })(in_loop(statement));
  659. case "for":
  660. return for_();
  661. case "function":
  662. return function_(true);
  663. case "if":
  664. return if_();
  665. case "return":
  666. if (S.in_function == 0)
  667. croak("'return' outside of function");
  668. return as("return",
  669. is("punc", ";")
  670. ? (next(), null)
  671. : can_insert_semicolon()
  672. ? null
  673. : prog1(expression, semicolon));
  674. case "switch":
  675. return as("switch", parenthesised(), switch_block_());
  676. case "throw":
  677. return as("throw", prog1(expression, semicolon));
  678. case "try":
  679. return try_();
  680. case "var":
  681. return prog1(var_, semicolon);
  682. case "const":
  683. return prog1(const_, semicolon);
  684. case "while":
  685. return as("while", parenthesised(), in_loop(statement));
  686. case "with":
  687. return as("with", parenthesised(), statement());
  688. default:
  689. unexpected();
  690. }
  691. }
  692. };
  693. function labeled_statement(label) {
  694. S.labels.push(label);
  695. var start = S.token, stat = statement();
  696. if (strict_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
  697. unexpected(start);
  698. S.labels.pop();
  699. return as("label", label, stat);
  700. };
  701. function simple_statement() {
  702. return as("stat", prog1(expression, semicolon));
  703. };
  704. function break_cont(type) {
  705. var name = is("name") ? S.token.value : null;
  706. if (name != null) {
  707. next();
  708. if (!member(name, S.labels))
  709. croak("Label " + name + " without matching loop or statement");
  710. }
  711. else if (S.in_loop == 0)
  712. croak(type + " not inside a loop or switch");
  713. semicolon();
  714. return as(type, name);
  715. };
  716. function for_() {
  717. expect("(");
  718. var has_var = is("keyword", "var");
  719. if (has_var)
  720. next();
  721. if (is("name") && is_token(peek(), "operator", "in")) {
  722. // for (i in foo)
  723. var name = S.token.value;
  724. next(); next();
  725. var obj = expression();
  726. expect(")");
  727. return as("for-in", has_var, name, obj, in_loop(statement));
  728. } else {
  729. // classic for
  730. var init = is("punc", ";") ? null : has_var ? var_() : expression();
  731. expect(";");
  732. var test = is("punc", ";") ? null : expression();
  733. expect(";");
  734. var step = is("punc", ")") ? null : expression();
  735. expect(")");
  736. return as("for", init, test, step, in_loop(statement));
  737. }
  738. };
  739. function function_(in_statement) {
  740. var name = is("name") ? prog1(S.token.value, next) : null;
  741. if (in_statement && !name)
  742. unexpected();
  743. expect("(");
  744. return as(in_statement ? "defun" : "function",
  745. name,
  746. // arguments
  747. (function(first, a){
  748. while (!is("punc", ")")) {
  749. if (first) first = false; else expect(",");
  750. if (!is("name")) unexpected();
  751. a.push(S.token.value);
  752. next();
  753. }
  754. next();
  755. return a;
  756. })(true, []),
  757. // body
  758. (function(){
  759. ++S.in_function;
  760. var loop = S.in_loop;
  761. S.in_loop = 0;
  762. var a = block_();
  763. --S.in_function;
  764. S.in_loop = loop;
  765. return a;
  766. })());
  767. };
  768. function if_() {
  769. var cond = parenthesised(), body = statement(), belse;
  770. if (is("keyword", "else")) {
  771. next();
  772. belse = statement();
  773. }
  774. return as("if", cond, body, belse);
  775. };
  776. function block_() {
  777. expect("{");
  778. var a = [];
  779. while (!is("punc", "}")) {
  780. if (is("eof")) unexpected();
  781. a.push(statement());
  782. }
  783. next();
  784. return a;
  785. };
  786. var switch_block_ = curry(in_loop, function(){
  787. expect("{");
  788. var a = [], cur = null;
  789. while (!is("punc", "}")) {
  790. if (is("eof")) unexpected();
  791. if (is("keyword", "case")) {
  792. next();
  793. cur = [];
  794. a.push([ expression(), cur ]);
  795. expect(":");
  796. }
  797. else if (is("keyword", "default")) {
  798. next();
  799. expect(":");
  800. cur = [];
  801. a.push([ null, cur ]);
  802. }
  803. else {
  804. if (!cur) unexpected();
  805. cur.push(statement());
  806. }
  807. }
  808. next();
  809. return a;
  810. });
  811. function try_() {
  812. var body = block_(), bcatch, bfinally;
  813. if (is("keyword", "catch")) {
  814. next();
  815. expect("(");
  816. if (!is("name"))
  817. croak("Name expected");
  818. var name = S.token.value;
  819. next();
  820. expect(")");
  821. bcatch = [ name, block_() ];
  822. }
  823. if (is("keyword", "finally")) {
  824. next();
  825. bfinally = block_();
  826. }
  827. if (!bcatch && !bfinally)
  828. croak("Missing catch/finally blocks");
  829. return as("try", body, bcatch, bfinally);
  830. };
  831. function vardefs() {
  832. var a = [];
  833. for (;;) {
  834. if (!is("name"))
  835. unexpected();
  836. var name = S.token.value;
  837. next();
  838. if (is("operator", "=")) {
  839. next();
  840. a.push([ name, expression(false) ]);
  841. } else {
  842. a.push([ name ]);
  843. }
  844. if (!is("punc", ","))
  845. break;
  846. next();
  847. }
  848. return a;
  849. };
  850. function var_() {
  851. return as("var", vardefs());
  852. };
  853. function const_() {
  854. return as("const", vardefs());
  855. };
  856. function new_() {
  857. var newexp = expr_atom(false), args;
  858. if (is("punc", "(")) {
  859. next();
  860. args = expr_list(")");
  861. } else {
  862. args = [];
  863. }
  864. return subscripts(as("new", newexp, args), true);
  865. };
  866. function expr_atom(allow_calls) {
  867. if (is("operator", "new")) {
  868. next();
  869. return new_();
  870. }
  871. if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
  872. return make_unary("unary-prefix",
  873. prog1(S.token.value, next),
  874. expr_atom(allow_calls));
  875. }
  876. if (is("punc")) {
  877. switch (S.token.value) {
  878. case "(":
  879. next();
  880. return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
  881. case "[":
  882. next();
  883. return subscripts(array_(), allow_calls);
  884. case "{":
  885. next();
  886. return subscripts(object_(), allow_calls);
  887. }
  888. unexpected();
  889. }
  890. if (is("keyword", "function")) {
  891. next();
  892. return subscripts(function_(false), allow_calls);
  893. }
  894. if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
  895. var atom = S.token.type == "regexp"
  896. ? as("regexp", S.token.value[0], S.token.value[1])
  897. : as(S.token.type, S.token.value);
  898. return subscripts(prog1(atom, next), allow_calls);
  899. }
  900. unexpected();
  901. };
  902. function expr_list(closing, allow_trailing_comma, allow_empty) {
  903. var first = true, a = [];
  904. while (!is("punc", closing)) {
  905. if (first) first = false; else expect(",");
  906. if (allow_trailing_comma && is("punc", closing)) break;
  907. if (is("punc", ",") && allow_empty) {
  908. a.push([ "atom", "undefined" ]);
  909. } else {
  910. a.push(expression(false));
  911. }
  912. }
  913. next();
  914. return a;
  915. };
  916. function array_() {
  917. return as("array", expr_list("]", !strict_mode, true));
  918. };
  919. function object_() {
  920. var first = true, a = [];
  921. while (!is("punc", "}")) {
  922. if (first) first = false; else expect(",");
  923. if (!strict_mode && is("punc", "}"))
  924. // allow trailing comma
  925. break;
  926. var type = S.token.type;
  927. var name = as_property_name();
  928. if (type == "name" && (name == "get" || name == "set") && !is("punc", ":")) {
  929. a.push([ as_name(), function_(false), name ]);
  930. } else {
  931. expect(":");
  932. a.push([ name, expression(false) ]);
  933. }
  934. }
  935. next();
  936. return as("object", a);
  937. };
  938. function as_property_name() {
  939. switch (S.token.type) {
  940. case "num":
  941. case "string":
  942. return prog1(S.token.value, next);
  943. }
  944. return as_name();
  945. };
  946. function as_name() {
  947. switch (S.token.type) {
  948. case "name":
  949. case "operator":
  950. case "keyword":
  951. case "atom":
  952. return prog1(S.token.value, next);
  953. default:
  954. unexpected();
  955. }
  956. };
  957. function subscripts(expr, allow_calls) {
  958. if (is("punc", ".")) {
  959. next();
  960. return subscripts(as("dot", expr, as_name()), allow_calls);
  961. }
  962. if (is("punc", "[")) {
  963. next();
  964. return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls);
  965. }
  966. if (allow_calls && is("punc", "(")) {
  967. next();
  968. return subscripts(as("call", expr, expr_list(")")), true);
  969. }
  970. if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) {
  971. return prog1(curry(make_unary, "unary-postfix", S.token.value, expr),
  972. next);
  973. }
  974. return expr;
  975. };
  976. function make_unary(tag, op, expr) {
  977. if ((op == "++" || op == "--") && !is_assignable(expr))
  978. croak("Invalid use of " + op + " operator");
  979. return as(tag, op, expr);
  980. };
  981. function expr_op(left, min_prec) {
  982. var op = is("operator") ? S.token.value : null;
  983. var prec = op != null ? PRECEDENCE[op] : null;
  984. if (prec != null && prec > min_prec) {
  985. next();
  986. var right = expr_op(expr_atom(true), prec);
  987. return expr_op(as("binary", op, left, right), min_prec);
  988. }
  989. return left;
  990. };
  991. function expr_ops() {
  992. return expr_op(expr_atom(true), 0);
  993. };
  994. function maybe_conditional() {
  995. var expr = expr_ops();
  996. if (is("operator", "?")) {
  997. next();
  998. var yes = expression(false);
  999. expect(":");
  1000. return as("conditional", expr, yes, expression(false));
  1001. }
  1002. return expr;
  1003. };
  1004. function is_assignable(expr) {
  1005. switch (expr[0]) {
  1006. case "dot":
  1007. case "sub":
  1008. return true;
  1009. case "name":
  1010. return expr[1] != "this";
  1011. }
  1012. };
  1013. function maybe_assign() {
  1014. var left = maybe_conditional(), val = S.token.value;
  1015. if (is("operator") && HOP(ASSIGNMENT, val)) {
  1016. if (is_assignable(left)) {
  1017. next();
  1018. return as("assign", ASSIGNMENT[val], left, maybe_assign());
  1019. }
  1020. croak("Invalid assignment");
  1021. }
  1022. return left;
  1023. };
  1024. function expression(commas) {
  1025. if (arguments.length == 0)
  1026. commas = true;
  1027. var expr = maybe_assign();
  1028. if (commas && is("punc", ",")) {
  1029. next();
  1030. return as("seq", expr, expression());
  1031. }
  1032. return expr;
  1033. };
  1034. function in_loop(cont) {
  1035. try {
  1036. ++S.in_loop;
  1037. return cont();
  1038. } finally {
  1039. --S.in_loop;
  1040. }
  1041. };
  1042. return as("toplevel", (function(a){
  1043. while (!is("eof"))
  1044. a.push(statement());
  1045. return a;
  1046. })([]));
  1047. };
  1048. /* -----[ Utilities ]----- */
  1049. function curry(f) {
  1050. var args = slice(arguments, 1);
  1051. return function() { return f.apply(this, args.concat(slice(arguments))); };
  1052. };
  1053. function prog1(ret) {
  1054. if (ret instanceof Function)
  1055. ret = ret();
  1056. for (var i = 1, n = arguments.length; --n > 0; ++i)
  1057. arguments[i]();
  1058. return ret;
  1059. };
  1060. function array_to_hash(a) {
  1061. var ret = {};
  1062. for (var i = 0; i < a.length; ++i)
  1063. ret[a[i]] = true;
  1064. return ret;
  1065. };
  1066. function slice(a, start) {
  1067. return Array.prototype.slice.call(a, start == null ? 0 : start);
  1068. };
  1069. function characters(str) {
  1070. return str.split("");
  1071. };
  1072. function member(name, array) {
  1073. for (var i = array.length; --i >= 0;)
  1074. if (array[i] === name)
  1075. return true;
  1076. return false;
  1077. };
  1078. function HOP(obj, prop) {
  1079. return Object.prototype.hasOwnProperty.call(obj, prop);
  1080. };
  1081. /* -----[ Exports ]----- */
  1082. exports.tokenizer = tokenizer;
  1083. exports.parse = parse;
  1084. exports.slice = slice;
  1085. exports.curry = curry;
  1086. exports.member = member;
  1087. exports.array_to_hash = array_to_hash;
  1088. exports.PRECEDENCE = PRECEDENCE;
  1089. exports.KEYWORDS_ATOM = KEYWORDS_ATOM;
  1090. exports.RESERVED_WORDS = RESERVED_WORDS;
  1091. exports.KEYWORDS = KEYWORDS;
  1092. exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN;
  1093. exports.OPERATORS = OPERATORS;
  1094. exports.is_alphanumeric_char = is_alphanumeric_char;
  1095. exports.is_identifier_char = is_identifier_char;