SourceJavaScriptTokenizer.re2js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. /*
  2. * Copyright (C) 2009 Google Inc. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are
  6. * met:
  7. *
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above
  11. * copyright notice, this list of conditions and the following disclaimer
  12. * in the documentation and/or other materials provided with the
  13. * distribution.
  14. * * Neither the name of Google Inc. nor the names of its
  15. * contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. */
  30. /*
  31. * Generate js file as follows:
  32. re2c -isc Source/WebCore/inspector/front-end/SourceJavaScriptTokenizer.re2js \
  33. | sed 's|^yy\([^:]*\)*\:|case \1:|' \
  34. | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
  35. | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
  36. | sed 's|[*]cursor|this._charAt(cursor)|' \
  37. | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
  38. | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
  39. | sed 's|yych <= \(0x[0-9a-fA-F]*\)|yych \<\= String.fromCharCode(\1)|' \
  40. | sed 's|unsigned\ int|var|' \
  41. | sed 's|var\ yych|case 1: var yych|' > Source/WebCore/inspector/front-end/SourceJavaScriptTokenizer.js
  42. */
  43. /**
  44. * @constructor
  45. * @extends {WebInspector.SourceTokenizer}
  46. */
  47. WebInspector.SourceJavaScriptTokenizer = function()
  48. {
  49. WebInspector.SourceTokenizer.call(this);
  50. this._lexConditions = {
  51. DIV: 0,
  52. NODIV: 1,
  53. COMMENT: 2,
  54. DSTRING: 3,
  55. SSTRING: 4,
  56. REGEX: 5
  57. };
  58. this.case_DIV = 1000;
  59. this.case_NODIV = 1001;
  60. this.case_COMMENT = 1002;
  61. this.case_DSTRING = 1003;
  62. this.case_SSTRING = 1004;
  63. this.case_REGEX = 1005;
  64. this.condition = this.createInitialCondition();
  65. }
  66. WebInspector.SourceJavaScriptTokenizer.Keywords = [
  67. "null", "true", "false", "break", "case", "catch", "const", "default", "finally", "for",
  68. "instanceof", "new", "var", "continue", "function", "return", "void", "delete", "if",
  69. "this", "do", "while", "else", "in", "switch", "throw", "try", "typeof", "debugger",
  70. "class", "enum", "export", "extends", "import", "super", "get", "set", "with"
  71. ].keySet();
  72. WebInspector.SourceJavaScriptTokenizer.GlobalObjectValueProperties = {
  73. "NaN": "javascript-nan",
  74. "undefined": "javascript-undef",
  75. "Infinity": "javascript-inf"
  76. };
  77. WebInspector.SourceJavaScriptTokenizer.prototype = {
  78. createInitialCondition: function()
  79. {
  80. return { lexCondition: this._lexConditions.NODIV };
  81. },
  82. nextToken: function(cursor)
  83. {
  84. var cursorOnEnter = cursor;
  85. var gotoCase = 1;
  86. var YYMARKER;
  87. while (1) {
  88. switch (gotoCase)
  89. // Following comment is replaced with generated state machine.
  90. /*!re2c
  91. re2c:define:YYCTYPE = "var";
  92. re2c:define:YYCURSOR = cursor;
  93. re2c:define:YYGETCONDITION = "this.getLexCondition";
  94. re2c:define:YYSETCONDITION = "this.setLexCondition";
  95. re2c:condprefix = "case this.case_";
  96. re2c:condenumprefix = "this._lexConditions.";
  97. re2c:yyfill:enable = 0;
  98. re2c:labelprefix = "case ";
  99. re2c:indent:top = 2;
  100. re2c:indent:string = " ";
  101. LineComment = "//" [^\r\n]*;
  102. CommentContent = ([^*\r\n] | ("*"+[^/*]))*;
  103. Comment = "/*" CommentContent "*"+ "/";
  104. CommentStart = "/*" CommentContent [\r\n];
  105. CommentEnd = CommentContent "*"+ "/";
  106. DecimalDigit = [0-9];
  107. NonZeroDigit = [1-9];
  108. OctalDigit = [0-7];
  109. HexDigit = [0-9a-fA-F];
  110. SignedInteger = ("+"|"-")? DecimalDigit+;
  111. ExponentPart = ("e" | "E") SignedInteger;
  112. DecimalIntegerLiteral = "0" | NonZeroDigit DecimalDigit*;
  113. DecimalLiteral = DecimalIntegerLiteral "." DecimalDigit* ExponentPart? | "." DecimalDigit+ ExponentPart? | DecimalIntegerLiteral ExponentPart?;
  114. HexIntegerLiteral = "0" ("x"|"X") HexDigit+;
  115. OctalIntegerLiteral = "0" OctalDigit+;
  116. NumericLiteral = DecimalLiteral | HexIntegerLiteral | OctalIntegerLiteral;
  117. Punctuation = [\!\%\&\(\*\+\,\-\.\:\;\<\=\>\?\[\]\^\{\|\}\~] | "!=" | "!==" | "%=" | "&&" | "&=" | "*=" | "++" | "+=" | "--" | "-=" | "<<" | "<<=" | "<=" | "==" | "===" | ">=" | ">>" | ">>=" | ">>>" | ">>>=" | "^=" | "|=" | "||";
  118. Division = "/" | "/=";
  119. RightParen = ")";
  120. Letter = [a-zA-Z\x80-\xFF];
  121. UnicodeEscapeSequence = "\\u" HexDigit HexDigit HexDigit HexDigit;
  122. IdentifierStart = Letter | "_" | "$" | UnicodeEscapeSequence;
  123. IdentifierPart = IdentifierStart | DecimalDigit;
  124. Identifier = IdentifierStart IdentifierPart *;
  125. Spaces = " "+;
  126. DoubleStringContent = ([^\r\n\"\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*;
  127. SingleStringContent = ([^\r\n\'\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*;
  128. StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
  129. DoubleStringStart = "\"" DoubleStringContent "\\" [\r\n];
  130. DoubleStringEnd = DoubleStringContent "\"";
  131. SingleStringStart = "'" SingleStringContent "\\" [\r\n];
  132. SingleStringEnd = SingleStringContent "'";
  133. BackslashSequence = "\\" [^\r\n];
  134. RegexSet = "[" ([^\r\n*\\/] | BackslashSequence)* "]";
  135. RegexFirstChar = [^\r\n*\\/\[\]] | BackslashSequence | RegexSet;
  136. RegexChar = [^\r\n\\/\[\]] | BackslashSequence | RegexSet;
  137. RegexContent = RegexChar*;
  138. Regex = "/" RegexFirstChar RegexContent "/" [igm]*;
  139. RegexStart = "/" RegexFirstChar RegexContent "\\";
  140. RegexEnd = RegexContent "/" [igm]*;
  141. <DIV,NODIV> LineComment { this.tokenType = "javascript-comment"; return cursor; }
  142. <DIV,NODIV> Comment { this.tokenType = "javascript-comment"; return cursor; }
  143. <DIV,NODIV> CommentStart => COMMENT { this.tokenType = "javascript-comment"; return cursor; }
  144. <COMMENT> CommentContent => COMMENT { this.tokenType = "javascript-comment"; return cursor; }
  145. <COMMENT> CommentEnd => NODIV { this.tokenType = "javascript-comment"; return cursor; }
  146. <DIV,NODIV> Spaces {this.tokenType = "whitespace"; return cursor; }
  147. <DIV,NODIV> StringLiteral { this.tokenType = "javascript-string"; return cursor; }
  148. <DIV,NODIV> DoubleStringStart => DSTRING { this.tokenType = "javascript-string"; return cursor; }
  149. <DSTRING> DoubleStringContent => DSTRING { this.tokenType = "javascript-string"; return cursor; }
  150. <DSTRING> DoubleStringEnd => NODIV { this.tokenType = "javascript-string"; return cursor; }
  151. <DIV,NODIV> SingleStringStart => SSTRING { this.tokenType = "javascript-string"; return cursor; }
  152. <SSTRING> SingleStringContent => SSTRING { this.tokenType = "javascript-string"; return cursor; }
  153. <SSTRING> SingleStringEnd => NODIV { this.tokenType = "javascript-string"; return cursor; }
  154. <NODIV> Regex { this.tokenType = "javascript-regexp"; return cursor; }
  155. <NODIV> RegexStart => REGEX { this.tokenType = "javascript-regexp"; return cursor; }
  156. <REGEX> RegexContent => REGEX { this.tokenType = "javascript-regexp"; return cursor; }
  157. <REGEX> RegexEnd => NODIV { this.tokenType = "javascript-regexp"; return cursor; }
  158. <DIV,NODIV> NumericLiteral => DIV { this.tokenType = "javascript-number"; return cursor; }
  159. <DIV,NODIV> Identifier => DIV
  160. {
  161. var token = this._line.substring(cursorOnEnter, cursor);
  162. if (WebInspector.SourceJavaScriptTokenizer.GlobalObjectValueProperties.hasOwnProperty(token))
  163. this.tokenType = WebInspector.SourceJavaScriptTokenizer.GlobalObjectValueProperties[token];
  164. else if (WebInspector.SourceJavaScriptTokenizer.Keywords[token] === true && token !== "__proto__")
  165. this.tokenType = "javascript-keyword";
  166. else
  167. this.tokenType = "javascript-ident";
  168. return cursor;
  169. }
  170. <DIV,NODIV> RightParen => DIV { this.tokenType = "brace-end"; return cursor; }
  171. <DIV,NODIV> Punctuation => NODIV
  172. {
  173. var token = this._line.charAt(cursorOnEnter);
  174. if (token === "{")
  175. this.tokenType = "block-start";
  176. else if (token === "}")
  177. this.tokenType = "block-end";
  178. else if (token === "(")
  179. this.tokenType = "brace-start";
  180. else this.tokenType = null;
  181. return cursor;
  182. }
  183. <DIV> Division => NODIV { this.tokenType = null; return cursor; }
  184. <*> [^] { this.tokenType = null; return cursor; }
  185. */
  186. }
  187. },
  188. __proto__: WebInspector.SourceTokenizer.prototype
  189. }