gdscript_tokenizer.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /**************************************************************************/
  2. /* gdscript_tokenizer.h */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #ifndef GDSCRIPT_TOKENIZER_H
  31. #define GDSCRIPT_TOKENIZER_H
  32. #include "core/templates/hash_map.h"
  33. #include "core/templates/hash_set.h"
  34. #include "core/templates/list.h"
  35. #include "core/templates/vector.h"
  36. #include "core/variant/variant.h"
  37. #ifdef MINGW_ENABLED
  38. #undef CONST
  39. #undef IN
  40. #undef VOID
  41. #endif
  42. class GDScriptTokenizer {
  43. public:
  44. enum CursorPlace {
  45. CURSOR_NONE,
  46. CURSOR_BEGINNING,
  47. CURSOR_MIDDLE,
  48. CURSOR_END,
  49. };
  50. struct Token {
  51. enum Type {
  52. EMPTY,
  53. // Basic
  54. ANNOTATION,
  55. IDENTIFIER,
  56. LITERAL,
  57. // Comparison
  58. LESS,
  59. LESS_EQUAL,
  60. GREATER,
  61. GREATER_EQUAL,
  62. EQUAL_EQUAL,
  63. BANG_EQUAL,
  64. // Logical
  65. AND,
  66. OR,
  67. NOT,
  68. AMPERSAND_AMPERSAND,
  69. PIPE_PIPE,
  70. BANG,
  71. // Bitwise
  72. AMPERSAND,
  73. PIPE,
  74. TILDE,
  75. CARET,
  76. LESS_LESS,
  77. GREATER_GREATER,
  78. // Math
  79. PLUS,
  80. MINUS,
  81. STAR,
  82. STAR_STAR,
  83. SLASH,
  84. PERCENT,
  85. // Assignment
  86. EQUAL,
  87. PLUS_EQUAL,
  88. MINUS_EQUAL,
  89. STAR_EQUAL,
  90. STAR_STAR_EQUAL,
  91. SLASH_EQUAL,
  92. PERCENT_EQUAL,
  93. LESS_LESS_EQUAL,
  94. GREATER_GREATER_EQUAL,
  95. AMPERSAND_EQUAL,
  96. PIPE_EQUAL,
  97. CARET_EQUAL,
  98. // Control flow
  99. IF,
  100. ELIF,
  101. ELSE,
  102. FOR,
  103. WHILE,
  104. BREAK,
  105. CONTINUE,
  106. PASS,
  107. RETURN,
  108. MATCH,
  109. WHEN,
  110. // Keywords
  111. AS,
  112. ASSERT,
  113. AWAIT,
  114. BREAKPOINT,
  115. CLASS,
  116. CLASS_NAME,
  117. CONST,
  118. ENUM,
  119. EXTENDS,
  120. FUNC,
  121. IN,
  122. IS,
  123. NAMESPACE,
  124. PRELOAD,
  125. SELF,
  126. SIGNAL,
  127. STATIC,
  128. SUPER,
  129. TRAIT,
  130. VAR,
  131. VOID,
  132. YIELD,
  133. // Punctuation
  134. BRACKET_OPEN,
  135. BRACKET_CLOSE,
  136. BRACE_OPEN,
  137. BRACE_CLOSE,
  138. PARENTHESIS_OPEN,
  139. PARENTHESIS_CLOSE,
  140. COMMA,
  141. SEMICOLON,
  142. PERIOD,
  143. PERIOD_PERIOD,
  144. COLON,
  145. DOLLAR,
  146. FORWARD_ARROW,
  147. UNDERSCORE,
  148. // Whitespace
  149. NEWLINE,
  150. INDENT,
  151. DEDENT,
  152. // Constants
  153. CONST_PI,
  154. CONST_TAU,
  155. CONST_INF,
  156. CONST_NAN,
  157. // Error message improvement
  158. VCS_CONFLICT_MARKER,
  159. BACKTICK,
  160. QUESTION_MARK,
  161. // Special
  162. ERROR,
  163. TK_EOF, // "EOF" is reserved
  164. TK_MAX
  165. };
  166. Type type = EMPTY;
  167. Variant literal;
  168. int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
  169. int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
  170. int cursor_position = -1;
  171. CursorPlace cursor_place = CURSOR_NONE;
  172. String source;
  173. const char *get_name() const;
  174. bool can_precede_bin_op() const;
  175. bool is_identifier() const;
  176. bool is_node_name() const;
  177. StringName get_identifier() const { return source; }
  178. Token(Type p_type) {
  179. type = p_type;
  180. }
  181. Token() {
  182. }
  183. };
  184. #ifdef TOOLS_ENABLED
  185. struct CommentData {
  186. String comment;
  187. // true: Comment starts at beginning of line or after indentation.
  188. // false: Inline comment (starts after some code).
  189. bool new_line = false;
  190. CommentData() {}
  191. CommentData(const String &p_comment, bool p_new_line) {
  192. comment = p_comment;
  193. new_line = p_new_line;
  194. }
  195. };
  196. const HashMap<int, CommentData> &get_comments() const {
  197. return comments;
  198. }
  199. #endif // TOOLS_ENABLED
  200. private:
  201. String source;
  202. const char32_t *_source = nullptr;
  203. const char32_t *_current = nullptr;
  204. int line = -1, column = -1;
  205. int cursor_line = -1, cursor_column = -1;
  206. int tab_size = 4;
  207. // Keep track of multichar tokens.
  208. const char32_t *_start = nullptr;
  209. int start_line = 0, start_column = 0;
  210. int leftmost_column = 0, rightmost_column = 0;
  211. // Info cache.
  212. bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
  213. bool multiline_mode = false;
  214. List<Token> error_stack;
  215. bool pending_newline = false;
  216. Token last_token;
  217. Token last_newline;
  218. int pending_indents = 0;
  219. List<int> indent_stack;
  220. List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
  221. List<char32_t> paren_stack;
  222. char32_t indent_char = '\0';
  223. int position = 0;
  224. int length = 0;
  225. #ifdef DEBUG_ENABLED
  226. Vector<String> keyword_list;
  227. #endif // DEBUG_ENABLED
  228. #ifdef TOOLS_ENABLED
  229. HashMap<int, CommentData> comments;
  230. #endif // TOOLS_ENABLED
  231. _FORCE_INLINE_ bool _is_at_end() { return position >= length; }
  232. _FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
  233. int indent_level() const { return indent_stack.size(); }
  234. bool has_error() const { return !error_stack.is_empty(); }
  235. Token pop_error();
  236. char32_t _advance();
  237. String _get_indent_char_name(char32_t ch);
  238. void _skip_whitespace();
  239. void check_indent();
  240. #ifdef DEBUG_ENABLED
  241. void make_keyword_list();
  242. #endif // DEBUG_ENABLED
  243. Token make_error(const String &p_message);
  244. void push_error(const String &p_message);
  245. void push_error(const Token &p_error);
  246. Token make_paren_error(char32_t p_paren);
  247. Token make_token(Token::Type p_type);
  248. Token make_literal(const Variant &p_literal);
  249. Token make_identifier(const StringName &p_identifier);
  250. Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
  251. void push_paren(char32_t p_char);
  252. bool pop_paren(char32_t p_expected);
  253. void newline(bool p_make_token);
  254. Token number();
  255. Token potential_identifier();
  256. Token string();
  257. Token annotation();
  258. public:
  259. Token scan();
  260. void set_source_code(const String &p_source_code);
  261. int get_cursor_line() const;
  262. int get_cursor_column() const;
  263. void set_cursor_position(int p_line, int p_column);
  264. void set_multiline_mode(bool p_state);
  265. bool is_past_cursor() const;
  266. static String get_token_name(Token::Type p_token_type);
  267. void push_expression_indented_block(); // For lambdas, or blocks inside expressions.
  268. void pop_expression_indented_block(); // For lambdas, or blocks inside expressions.
  269. GDScriptTokenizer();
  270. };
  271. #endif // GDSCRIPT_TOKENIZER_H