gdscript_tokenizer.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530
  1. /**************************************************************************/
  2. /* gdscript_tokenizer.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "gdscript_tokenizer.h"
  31. #include "core/io/marshalls.h"
  32. #include "core/map.h"
  33. #include "core/print_string.h"
  34. #include "gdscript_functions.h"
  35. OAHashMap<String, int> *GDScriptTokenizer::token_hashtable = nullptr;
  36. const char *GDScriptTokenizer::token_names[TK_MAX] = {
  37. "Empty",
  38. "Identifier",
  39. "Constant",
  40. "Self",
  41. "Built-In Type",
  42. "Built-In Func",
  43. "In",
  44. "'=='",
  45. "'!='",
  46. "'<'",
  47. "'<='",
  48. "'>'",
  49. "'>='",
  50. "'and'",
  51. "'or'",
  52. "'not'",
  53. "'+'",
  54. "'-'",
  55. "'*'",
  56. "'/'",
  57. "'%'",
  58. "'<<'",
  59. "'>>'",
  60. "'='",
  61. "'+='",
  62. "'-='",
  63. "'*='",
  64. "'/='",
  65. "'%='",
  66. "'<<='",
  67. "'>>='",
  68. "'&='",
  69. "'|='",
  70. "'^='",
  71. "'&'",
  72. "'|'",
  73. "'^'",
  74. "'~'",
  75. //"Plus Plus",
  76. //"Minus Minus",
  77. "if",
  78. "elif",
  79. "else",
  80. "for",
  81. "while",
  82. "break",
  83. "continue",
  84. "pass",
  85. "return",
  86. "match",
  87. "func",
  88. "class",
  89. "class_name",
  90. "extends",
  91. "is",
  92. "onready",
  93. "tool",
  94. "static",
  95. "export",
  96. "setget",
  97. "const",
  98. "var",
  99. "as",
  100. "void",
  101. "enum",
  102. "preload",
  103. "assert",
  104. "yield",
  105. "signal",
  106. "breakpoint",
  107. "rpc",
  108. "sync",
  109. "master",
  110. "puppet",
  111. "slave",
  112. "remotesync",
  113. "mastersync",
  114. "puppetsync",
  115. "'['",
  116. "']'",
  117. "'{'",
  118. "'}'",
  119. "'('",
  120. "')'",
  121. "','",
  122. "';'",
  123. "'.'",
  124. "'?'",
  125. "':'",
  126. "'$'",
  127. "'->'",
  128. "'\\n'",
  129. "PI",
  130. "TAU",
  131. "_",
  132. "INF",
  133. "NAN",
  134. "Error",
  135. "EOF",
  136. "Cursor"
  137. };
  138. struct _bit {
  139. Variant::Type type;
  140. const char *text;
  141. };
  142. //built in types
  143. static const _bit _type_list[] = {
  144. //types
  145. { Variant::BOOL, "bool" },
  146. { Variant::INT, "int" },
  147. { Variant::REAL, "float" },
  148. { Variant::STRING, "String" },
  149. { Variant::VECTOR2, "Vector2" },
  150. { Variant::RECT2, "Rect2" },
  151. { Variant::TRANSFORM2D, "Transform2D" },
  152. { Variant::VECTOR3, "Vector3" },
  153. { Variant::AABB, "AABB" },
  154. { Variant::PLANE, "Plane" },
  155. { Variant::QUAT, "Quat" },
  156. { Variant::BASIS, "Basis" },
  157. { Variant::TRANSFORM, "Transform" },
  158. { Variant::COLOR, "Color" },
  159. { Variant::_RID, "RID" },
  160. { Variant::OBJECT, "Object" },
  161. { Variant::NODE_PATH, "NodePath" },
  162. { Variant::DICTIONARY, "Dictionary" },
  163. { Variant::ARRAY, "Array" },
  164. { Variant::POOL_BYTE_ARRAY, "PoolByteArray" },
  165. { Variant::POOL_INT_ARRAY, "PoolIntArray" },
  166. { Variant::POOL_REAL_ARRAY, "PoolRealArray" },
  167. { Variant::POOL_STRING_ARRAY, "PoolStringArray" },
  168. { Variant::POOL_VECTOR2_ARRAY, "PoolVector2Array" },
  169. { Variant::POOL_VECTOR3_ARRAY, "PoolVector3Array" },
  170. { Variant::POOL_COLOR_ARRAY, "PoolColorArray" },
  171. { Variant::VARIANT_MAX, nullptr },
  172. };
  173. struct _kws {
  174. GDScriptTokenizer::Token token;
  175. const char *text;
  176. };
  177. static const _kws _keyword_list[] = {
  178. //ops
  179. { GDScriptTokenizer::TK_OP_IN, "in" },
  180. { GDScriptTokenizer::TK_OP_NOT, "not" },
  181. { GDScriptTokenizer::TK_OP_OR, "or" },
  182. { GDScriptTokenizer::TK_OP_AND, "and" },
  183. //func
  184. { GDScriptTokenizer::TK_PR_FUNCTION, "func" },
  185. { GDScriptTokenizer::TK_PR_CLASS, "class" },
  186. { GDScriptTokenizer::TK_PR_CLASS_NAME, "class_name" },
  187. { GDScriptTokenizer::TK_PR_EXTENDS, "extends" },
  188. { GDScriptTokenizer::TK_PR_IS, "is" },
  189. { GDScriptTokenizer::TK_PR_ONREADY, "onready" },
  190. { GDScriptTokenizer::TK_PR_TOOL, "tool" },
  191. { GDScriptTokenizer::TK_PR_STATIC, "static" },
  192. { GDScriptTokenizer::TK_PR_EXPORT, "export" },
  193. { GDScriptTokenizer::TK_PR_SETGET, "setget" },
  194. { GDScriptTokenizer::TK_PR_VAR, "var" },
  195. { GDScriptTokenizer::TK_PR_AS, "as" },
  196. { GDScriptTokenizer::TK_PR_VOID, "void" },
  197. { GDScriptTokenizer::TK_PR_PRELOAD, "preload" },
  198. { GDScriptTokenizer::TK_PR_ASSERT, "assert" },
  199. { GDScriptTokenizer::TK_PR_YIELD, "yield" },
  200. { GDScriptTokenizer::TK_PR_SIGNAL, "signal" },
  201. { GDScriptTokenizer::TK_PR_BREAKPOINT, "breakpoint" },
  202. { GDScriptTokenizer::TK_PR_REMOTE, "remote" },
  203. { GDScriptTokenizer::TK_PR_MASTER, "master" },
  204. { GDScriptTokenizer::TK_PR_SLAVE, "slave" },
  205. { GDScriptTokenizer::TK_PR_PUPPET, "puppet" },
  206. { GDScriptTokenizer::TK_PR_SYNC, "sync" },
  207. { GDScriptTokenizer::TK_PR_REMOTESYNC, "remotesync" },
  208. { GDScriptTokenizer::TK_PR_MASTERSYNC, "mastersync" },
  209. { GDScriptTokenizer::TK_PR_PUPPETSYNC, "puppetsync" },
  210. { GDScriptTokenizer::TK_PR_CONST, "const" },
  211. { GDScriptTokenizer::TK_PR_ENUM, "enum" },
  212. //controlflow
  213. { GDScriptTokenizer::TK_CF_IF, "if" },
  214. { GDScriptTokenizer::TK_CF_ELIF, "elif" },
  215. { GDScriptTokenizer::TK_CF_ELSE, "else" },
  216. { GDScriptTokenizer::TK_CF_FOR, "for" },
  217. { GDScriptTokenizer::TK_CF_WHILE, "while" },
  218. { GDScriptTokenizer::TK_CF_BREAK, "break" },
  219. { GDScriptTokenizer::TK_CF_CONTINUE, "continue" },
  220. { GDScriptTokenizer::TK_CF_RETURN, "return" },
  221. { GDScriptTokenizer::TK_CF_MATCH, "match" },
  222. { GDScriptTokenizer::TK_CF_PASS, "pass" },
  223. { GDScriptTokenizer::TK_SELF, "self" },
  224. { GDScriptTokenizer::TK_CONST_PI, "PI" },
  225. { GDScriptTokenizer::TK_CONST_TAU, "TAU" },
  226. { GDScriptTokenizer::TK_WILDCARD, "_" },
  227. { GDScriptTokenizer::TK_CONST_INF, "INF" },
  228. { GDScriptTokenizer::TK_CONST_NAN, "NAN" },
  229. { GDScriptTokenizer::TK_ERROR, nullptr }
  230. };
  231. // Prepare the hash table for parsing as a one off at startup.
  232. void GDScriptTokenizer::initialize() {
  233. token_hashtable = memnew((OAHashMap<String, int>));
  234. token_hashtable->insert("null", 0);
  235. token_hashtable->insert("true", 1);
  236. token_hashtable->insert("false", 2);
  237. // _type_list
  238. int id = TOKEN_HASH_TABLE_TYPE_START;
  239. int idx = 0;
  240. while (_type_list[idx].text) {
  241. token_hashtable->insert(_type_list[idx].text, id++);
  242. idx++;
  243. }
  244. // built in funcs
  245. id = TOKEN_HASH_TABLE_BUILTIN_START;
  246. for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
  247. token_hashtable->insert(GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j)), id++);
  248. }
  249. // keywords
  250. id = TOKEN_HASH_TABLE_KEYWORD_START;
  251. idx = 0;
  252. while (_keyword_list[idx].text) {
  253. token_hashtable->insert(_keyword_list[idx].text, id++);
  254. idx++;
  255. }
  256. }
  257. void GDScriptTokenizer::terminate() {
  258. if (token_hashtable) {
  259. memdelete(token_hashtable);
  260. token_hashtable = nullptr;
  261. }
  262. }
  263. // return whether found
  264. bool GDScriptTokenizerText::_parse_identifier(const String &p_str) {
  265. // N.B. GDScriptTokenizer::initialize() must have been called before using this function,
  266. // else token_hashtable will be NULL.
  267. const int *found = token_hashtable->lookup_ptr(p_str);
  268. if (found) {
  269. int id = *found;
  270. if (id < TOKEN_HASH_TABLE_TYPE_START) {
  271. switch (id) {
  272. case 0: {
  273. _make_constant(Variant());
  274. } break;
  275. case 1: {
  276. _make_constant(true);
  277. } break;
  278. case 2: {
  279. _make_constant(false);
  280. } break;
  281. default: {
  282. DEV_ASSERT(0);
  283. } break;
  284. }
  285. return true;
  286. } else {
  287. // type list
  288. if (id < TOKEN_HASH_TABLE_BUILTIN_START) {
  289. int idx = id - TOKEN_HASH_TABLE_TYPE_START;
  290. _make_type(_type_list[idx].type);
  291. return true;
  292. }
  293. // built in func
  294. if (id < TOKEN_HASH_TABLE_KEYWORD_START) {
  295. int idx = id - TOKEN_HASH_TABLE_BUILTIN_START;
  296. _make_built_in_func(GDScriptFunctions::Function(idx));
  297. return true;
  298. }
  299. // keyword
  300. int idx = id - TOKEN_HASH_TABLE_KEYWORD_START;
  301. _make_token(_keyword_list[idx].token);
  302. return true;
  303. }
  304. return true;
  305. }
  306. // not found
  307. return false;
  308. }
  309. const char *GDScriptTokenizer::get_token_name(Token p_token) {
  310. ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
  311. return token_names[p_token];
  312. }
  313. bool GDScriptTokenizer::is_token_literal(int p_offset, bool variable_safe) const {
  314. switch (get_token(p_offset)) {
  315. // Can always be literal:
  316. case TK_IDENTIFIER:
  317. case TK_PR_ONREADY:
  318. case TK_PR_TOOL:
  319. case TK_PR_STATIC:
  320. case TK_PR_EXPORT:
  321. case TK_PR_SETGET:
  322. case TK_PR_SIGNAL:
  323. case TK_PR_REMOTE:
  324. case TK_PR_MASTER:
  325. case TK_PR_PUPPET:
  326. case TK_PR_SYNC:
  327. case TK_PR_REMOTESYNC:
  328. case TK_PR_MASTERSYNC:
  329. case TK_PR_PUPPETSYNC:
  330. return true;
  331. // Literal for non-variables only:
  332. case TK_BUILT_IN_TYPE:
  333. case TK_BUILT_IN_FUNC:
  334. case TK_OP_IN:
  335. //case TK_OP_NOT:
  336. //case TK_OP_OR:
  337. //case TK_OP_AND:
  338. case TK_PR_CLASS:
  339. case TK_PR_CONST:
  340. case TK_PR_ENUM:
  341. case TK_PR_PRELOAD:
  342. case TK_PR_FUNCTION:
  343. case TK_PR_EXTENDS:
  344. case TK_PR_ASSERT:
  345. case TK_PR_YIELD:
  346. case TK_PR_VAR:
  347. case TK_CF_IF:
  348. case TK_CF_ELIF:
  349. case TK_CF_ELSE:
  350. case TK_CF_FOR:
  351. case TK_CF_WHILE:
  352. case TK_CF_BREAK:
  353. case TK_CF_CONTINUE:
  354. case TK_CF_RETURN:
  355. case TK_CF_MATCH:
  356. case TK_CF_PASS:
  357. case TK_SELF:
  358. case TK_CONST_PI:
  359. case TK_CONST_TAU:
  360. case TK_WILDCARD:
  361. case TK_CONST_INF:
  362. case TK_CONST_NAN:
  363. case TK_ERROR:
  364. return !variable_safe;
  365. case TK_CONSTANT: {
  366. switch (get_token_constant(p_offset).get_type()) {
  367. case Variant::NIL:
  368. case Variant::BOOL:
  369. return true;
  370. default:
  371. return false;
  372. }
  373. }
  374. default:
  375. return false;
  376. }
  377. }
  378. StringName GDScriptTokenizer::get_token_literal(int p_offset) const {
  379. Token token = get_token(p_offset);
  380. switch (token) {
  381. case TK_IDENTIFIER:
  382. return get_token_identifier(p_offset);
  383. case TK_BUILT_IN_TYPE: {
  384. Variant::Type type = get_token_type(p_offset);
  385. int idx = 0;
  386. while (_type_list[idx].text) {
  387. if (type == _type_list[idx].type) {
  388. return _type_list[idx].text;
  389. }
  390. idx++;
  391. }
  392. } break; // Shouldn't get here, stuff happens
  393. case TK_BUILT_IN_FUNC:
  394. return GDScriptFunctions::get_func_name(get_token_built_in_func(p_offset));
  395. case TK_CONSTANT: {
  396. const Variant value = get_token_constant(p_offset);
  397. switch (value.get_type()) {
  398. case Variant::NIL:
  399. return "null";
  400. case Variant::BOOL:
  401. return value ? "true" : "false";
  402. default: {
  403. }
  404. }
  405. }
  406. case TK_OP_AND:
  407. case TK_OP_OR:
  408. break; // Don't get into default, since they can be non-literal
  409. default: {
  410. int idx = 0;
  411. while (_keyword_list[idx].text) {
  412. if (token == _keyword_list[idx].token) {
  413. return _keyword_list[idx].text;
  414. }
  415. idx++;
  416. }
  417. }
  418. }
  419. ERR_FAIL_V_MSG("", "Failed to get token literal.");
  420. }
  421. static bool _is_text_char(CharType c) {
  422. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
  423. }
  424. static bool _is_number(CharType c) {
  425. return (c >= '0' && c <= '9');
  426. }
  427. static bool _is_hex(CharType c) {
  428. return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
  429. }
  430. static bool _is_bin(CharType c) {
  431. return (c == '0' || c == '1');
  432. }
  433. void GDScriptTokenizerText::_make_token(Token p_type) {
  434. TokenData &tk = tk_rb[tk_rb_pos];
  435. tk.type = p_type;
  436. tk.line = line;
  437. tk.col = column;
  438. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  439. }
  440. void GDScriptTokenizerText::_make_identifier(const StringName &p_identifier) {
  441. TokenData &tk = tk_rb[tk_rb_pos];
  442. tk.type = TK_IDENTIFIER;
  443. tk.identifier = p_identifier;
  444. tk.line = line;
  445. tk.col = column;
  446. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  447. }
  448. void GDScriptTokenizerText::_make_built_in_func(GDScriptFunctions::Function p_func) {
  449. TokenData &tk = tk_rb[tk_rb_pos];
  450. tk.type = TK_BUILT_IN_FUNC;
  451. tk.func = p_func;
  452. tk.line = line;
  453. tk.col = column;
  454. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  455. }
  456. void GDScriptTokenizerText::_make_constant(const Variant &p_constant) {
  457. TokenData &tk = tk_rb[tk_rb_pos];
  458. tk.type = TK_CONSTANT;
  459. tk.constant = p_constant;
  460. tk.line = line;
  461. tk.col = column;
  462. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  463. }
  464. void GDScriptTokenizerText::_make_type(const Variant::Type &p_type) {
  465. TokenData &tk = tk_rb[tk_rb_pos];
  466. tk.type = TK_BUILT_IN_TYPE;
  467. tk.vtype = p_type;
  468. tk.line = line;
  469. tk.col = column;
  470. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  471. }
  472. void GDScriptTokenizerText::_make_error(const String &p_error) {
  473. error_flag = true;
  474. last_error = p_error;
  475. TokenData &tk = tk_rb[tk_rb_pos];
  476. tk.type = TK_ERROR;
  477. tk.constant = p_error;
  478. tk.line = line;
  479. tk.col = column;
  480. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  481. }
  482. void GDScriptTokenizerText::_make_newline(int p_indentation, int p_tabs) {
  483. TokenData &tk = tk_rb[tk_rb_pos];
  484. tk.type = TK_NEWLINE;
  485. tk.constant = Vector2(p_indentation, p_tabs);
  486. tk.line = line;
  487. tk.col = column;
  488. tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
  489. }
  490. void GDScriptTokenizerText::_advance() {
  491. if (error_flag) {
  492. //parser broke
  493. _make_error(last_error);
  494. return;
  495. }
  496. if (code_pos >= len) {
  497. _make_token(TK_EOF);
  498. return;
  499. }
  500. #define GETCHAR(m_ofs) ((m_ofs + code_pos) >= len ? 0 : _code[m_ofs + code_pos])
  501. #define INCPOS(m_amount) \
  502. { \
  503. code_pos += m_amount; \
  504. column += m_amount; \
  505. }
  506. while (true) {
  507. bool is_node_path = false;
  508. StringMode string_mode = STRING_DOUBLE_QUOTE;
  509. switch (GETCHAR(0)) {
  510. case 0:
  511. _make_token(TK_EOF);
  512. break;
  513. case '\\':
  514. INCPOS(1);
  515. if (GETCHAR(0) == '\r') {
  516. INCPOS(1);
  517. }
  518. if (GETCHAR(0) != '\n') {
  519. _make_error("Expected newline after '\\'.");
  520. return;
  521. }
  522. INCPOS(1);
  523. line++;
  524. while (GETCHAR(0) == ' ' || GETCHAR(0) == '\t') {
  525. INCPOS(1);
  526. }
  527. continue;
  528. case '\t':
  529. case '\r':
  530. case ' ':
  531. INCPOS(1);
  532. continue;
  533. case '#': { // line comment skip
  534. #ifdef DEBUG_ENABLED
  535. String comment;
  536. #endif // DEBUG_ENABLED
  537. while (GETCHAR(0) != '\n') {
  538. #ifdef DEBUG_ENABLED
  539. comment += GETCHAR(0);
  540. #endif // DEBUG_ENABLED
  541. code_pos++;
  542. if (GETCHAR(0) == 0) { //end of file
  543. //_make_error("Unterminated Comment");
  544. _make_token(TK_EOF);
  545. return;
  546. }
  547. }
  548. #ifdef DEBUG_ENABLED
  549. String comment_content = comment.trim_prefix("#").trim_prefix(" ");
  550. if (comment_content.begins_with("warning-ignore:")) {
  551. String code = comment_content.get_slice(":", 1);
  552. warning_skips.push_back(Pair<int, String>(line, code.strip_edges().to_lower()));
  553. } else if (comment_content.begins_with("warning-ignore-all:")) {
  554. String code = comment_content.get_slice(":", 1);
  555. warning_global_skips.insert(code.strip_edges().to_lower());
  556. } else if (comment_content.strip_edges() == "warnings-disable") {
  557. ignore_warnings = true;
  558. }
  559. #endif // DEBUG_ENABLED
  560. FALLTHROUGH;
  561. }
  562. case '\n': {
  563. line++;
  564. INCPOS(1);
  565. bool used_spaces = false;
  566. int tabs = 0;
  567. column = 1;
  568. int i = 0;
  569. while (true) {
  570. if (GETCHAR(i) == ' ') {
  571. i++;
  572. used_spaces = true;
  573. } else if (GETCHAR(i) == '\t') {
  574. if (used_spaces) {
  575. _make_error("Spaces used before tabs on a line");
  576. return;
  577. }
  578. i++;
  579. tabs++;
  580. } else {
  581. break; // not indentation anymore
  582. }
  583. }
  584. _make_newline(i, tabs);
  585. return;
  586. }
  587. case '/': {
  588. switch (GETCHAR(1)) {
  589. case '=': { // diveq
  590. _make_token(TK_OP_ASSIGN_DIV);
  591. INCPOS(1);
  592. } break;
  593. default:
  594. _make_token(TK_OP_DIV);
  595. }
  596. } break;
  597. case '=': {
  598. if (GETCHAR(1) == '=') {
  599. _make_token(TK_OP_EQUAL);
  600. INCPOS(1);
  601. } else {
  602. _make_token(TK_OP_ASSIGN);
  603. }
  604. } break;
  605. case '<': {
  606. if (GETCHAR(1) == '=') {
  607. _make_token(TK_OP_LESS_EQUAL);
  608. INCPOS(1);
  609. } else if (GETCHAR(1) == '<') {
  610. if (GETCHAR(2) == '=') {
  611. _make_token(TK_OP_ASSIGN_SHIFT_LEFT);
  612. INCPOS(1);
  613. } else {
  614. _make_token(TK_OP_SHIFT_LEFT);
  615. }
  616. INCPOS(1);
  617. } else {
  618. _make_token(TK_OP_LESS);
  619. }
  620. } break;
  621. case '>': {
  622. if (GETCHAR(1) == '=') {
  623. _make_token(TK_OP_GREATER_EQUAL);
  624. INCPOS(1);
  625. } else if (GETCHAR(1) == '>') {
  626. if (GETCHAR(2) == '=') {
  627. _make_token(TK_OP_ASSIGN_SHIFT_RIGHT);
  628. INCPOS(1);
  629. } else {
  630. _make_token(TK_OP_SHIFT_RIGHT);
  631. }
  632. INCPOS(1);
  633. } else {
  634. _make_token(TK_OP_GREATER);
  635. }
  636. } break;
  637. case '!': {
  638. if (GETCHAR(1) == '=') {
  639. _make_token(TK_OP_NOT_EQUAL);
  640. INCPOS(1);
  641. } else {
  642. _make_token(TK_OP_NOT);
  643. }
  644. } break;
  645. //case '"' //string - no strings in shader
  646. //case '\'' //string - no strings in shader
  647. case '{':
  648. _make_token(TK_CURLY_BRACKET_OPEN);
  649. break;
  650. case '}':
  651. _make_token(TK_CURLY_BRACKET_CLOSE);
  652. break;
  653. case '[':
  654. _make_token(TK_BRACKET_OPEN);
  655. break;
  656. case ']':
  657. _make_token(TK_BRACKET_CLOSE);
  658. break;
  659. case '(':
  660. _make_token(TK_PARENTHESIS_OPEN);
  661. break;
  662. case ')':
  663. _make_token(TK_PARENTHESIS_CLOSE);
  664. break;
  665. case ',':
  666. _make_token(TK_COMMA);
  667. break;
  668. case ';':
  669. _make_token(TK_SEMICOLON);
  670. break;
  671. case '?':
  672. _make_token(TK_QUESTION_MARK);
  673. break;
  674. case ':':
  675. _make_token(TK_COLON); //for methods maybe but now useless.
  676. break;
  677. case '$':
  678. _make_token(TK_DOLLAR); //for the get_node() shortener
  679. break;
  680. case '^': {
  681. if (GETCHAR(1) == '=') {
  682. _make_token(TK_OP_ASSIGN_BIT_XOR);
  683. INCPOS(1);
  684. } else {
  685. _make_token(TK_OP_BIT_XOR);
  686. }
  687. } break;
  688. case '~':
  689. _make_token(TK_OP_BIT_INVERT);
  690. break;
  691. case '&': {
  692. if (GETCHAR(1) == '&') {
  693. _make_token(TK_OP_AND);
  694. INCPOS(1);
  695. } else if (GETCHAR(1) == '=') {
  696. _make_token(TK_OP_ASSIGN_BIT_AND);
  697. INCPOS(1);
  698. } else {
  699. _make_token(TK_OP_BIT_AND);
  700. }
  701. } break;
  702. case '|': {
  703. if (GETCHAR(1) == '|') {
  704. _make_token(TK_OP_OR);
  705. INCPOS(1);
  706. } else if (GETCHAR(1) == '=') {
  707. _make_token(TK_OP_ASSIGN_BIT_OR);
  708. INCPOS(1);
  709. } else {
  710. _make_token(TK_OP_BIT_OR);
  711. }
  712. } break;
  713. case '*': {
  714. if (GETCHAR(1) == '=') {
  715. _make_token(TK_OP_ASSIGN_MUL);
  716. INCPOS(1);
  717. } else {
  718. _make_token(TK_OP_MUL);
  719. }
  720. } break;
  721. case '+': {
  722. if (GETCHAR(1) == '=') {
  723. _make_token(TK_OP_ASSIGN_ADD);
  724. INCPOS(1);
  725. /*
  726. } else if (GETCHAR(1)=='+') {
  727. _make_token(TK_OP_PLUS_PLUS);
  728. INCPOS(1);
  729. */
  730. } else {
  731. _make_token(TK_OP_ADD);
  732. }
  733. } break;
  734. case '-': {
  735. if (GETCHAR(1) == '=') {
  736. _make_token(TK_OP_ASSIGN_SUB);
  737. INCPOS(1);
  738. } else if (GETCHAR(1) == '>') {
  739. _make_token(TK_FORWARD_ARROW);
  740. INCPOS(1);
  741. } else {
  742. _make_token(TK_OP_SUB);
  743. }
  744. } break;
  745. case '%': {
  746. if (GETCHAR(1) == '=') {
  747. _make_token(TK_OP_ASSIGN_MOD);
  748. INCPOS(1);
  749. } else {
  750. _make_token(TK_OP_MOD);
  751. }
  752. } break;
  753. case '@':
  754. if (CharType(GETCHAR(1)) != '"' && CharType(GETCHAR(1)) != '\'') {
  755. _make_error("Unexpected '@'");
  756. return;
  757. }
  758. INCPOS(1);
  759. is_node_path = true;
  760. FALLTHROUGH;
  761. case '\'':
  762. case '"': {
  763. if (GETCHAR(0) == '\'') {
  764. string_mode = STRING_SINGLE_QUOTE;
  765. }
  766. int i = 1;
  767. if (string_mode == STRING_DOUBLE_QUOTE && GETCHAR(i) == '"' && GETCHAR(i + 1) == '"') {
  768. i += 2;
  769. string_mode = STRING_MULTILINE;
  770. }
  771. String str;
  772. while (true) {
  773. if (CharType(GETCHAR(i)) == 0) {
  774. _make_error("Unterminated String");
  775. return;
  776. } else if (string_mode == STRING_DOUBLE_QUOTE && CharType(GETCHAR(i)) == '"') {
  777. break;
  778. } else if (string_mode == STRING_SINGLE_QUOTE && CharType(GETCHAR(i)) == '\'') {
  779. break;
  780. } else if (string_mode == STRING_MULTILINE && CharType(GETCHAR(i)) == '\"' && CharType(GETCHAR(i + 1)) == '\"' && CharType(GETCHAR(i + 2)) == '\"') {
  781. i += 2;
  782. break;
  783. } else if (string_mode != STRING_MULTILINE && CharType(GETCHAR(i)) == '\n') {
  784. _make_error("Unexpected EOL at String.");
  785. return;
  786. } else if (CharType(GETCHAR(i)) == 0xFFFF) {
  787. //string ends here, next will be TK
  788. i--;
  789. break;
  790. } else if (CharType(GETCHAR(i)) == '\\') {
  791. //escaped characters...
  792. i++;
  793. CharType next = GETCHAR(i);
  794. if (next == 0) {
  795. _make_error("Unterminated String");
  796. return;
  797. }
  798. CharType res = 0;
  799. switch (next) {
  800. case 'a':
  801. res = 7;
  802. break;
  803. case 'b':
  804. res = 8;
  805. break;
  806. case 't':
  807. res = 9;
  808. break;
  809. case 'n':
  810. res = 10;
  811. break;
  812. case 'v':
  813. res = 11;
  814. break;
  815. case 'f':
  816. res = 12;
  817. break;
  818. case 'r':
  819. res = 13;
  820. break;
  821. case '\'':
  822. res = '\'';
  823. break;
  824. case '\"':
  825. res = '\"';
  826. break;
  827. case '\\':
  828. res = '\\';
  829. break;
  830. case '/':
  831. res = '/';
  832. break; //wtf
  833. case 'u': {
  834. //hexnumbarh - oct is deprecated
  835. i += 1;
  836. for (int j = 0; j < 4; j++) {
  837. CharType c = GETCHAR(i + j);
  838. if (c == 0) {
  839. _make_error("Unterminated String");
  840. return;
  841. }
  842. CharType v = 0;
  843. if (c >= '0' && c <= '9') {
  844. v = c - '0';
  845. } else if (c >= 'a' && c <= 'f') {
  846. v = c - 'a';
  847. v += 10;
  848. } else if (c >= 'A' && c <= 'F') {
  849. v = c - 'A';
  850. v += 10;
  851. } else {
  852. _make_error("Malformed hex constant in string");
  853. return;
  854. }
  855. res <<= 4;
  856. res |= v;
  857. }
  858. i += 3;
  859. } break;
  860. default: {
  861. _make_error("Invalid escape sequence");
  862. return;
  863. } break;
  864. }
  865. str += res;
  866. } else {
  867. if (CharType(GETCHAR(i)) == '\n') {
  868. line++;
  869. column = 1;
  870. }
  871. str += CharType(GETCHAR(i));
  872. }
  873. i++;
  874. }
  875. INCPOS(i);
  876. if (is_node_path) {
  877. _make_constant(NodePath(str));
  878. } else {
  879. _make_constant(str);
  880. }
  881. } break;
  882. case 0xFFFF: {
  883. _make_token(TK_CURSOR);
  884. } break;
  885. default: {
  886. if (_is_number(GETCHAR(0)) || (GETCHAR(0) == '.' && _is_number(GETCHAR(1)))) {
  887. // parse number
  888. bool period_found = false;
  889. bool exponent_found = false;
  890. bool hexa_found = false;
  891. bool bin_found = false;
  892. bool sign_found = false;
  893. String str;
  894. int i = 0;
  895. while (true) {
  896. if (GETCHAR(i) == '.') {
  897. if (period_found || exponent_found) {
  898. _make_error("Invalid numeric constant at '.'");
  899. return;
  900. } else if (bin_found) {
  901. _make_error("Invalid binary constant at '.'");
  902. return;
  903. } else if (hexa_found) {
  904. _make_error("Invalid hexadecimal constant at '.'");
  905. return;
  906. }
  907. period_found = true;
  908. } else if (GETCHAR(i) == 'x') {
  909. if (hexa_found || bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
  910. _make_error("Invalid numeric constant at 'x'");
  911. return;
  912. }
  913. hexa_found = true;
  914. } else if (hexa_found && _is_hex(GETCHAR(i))) {
  915. } else if (!hexa_found && GETCHAR(i) == 'b') {
  916. if (bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
  917. _make_error("Invalid numeric constant at 'b'");
  918. return;
  919. }
  920. bin_found = true;
  921. } else if (!hexa_found && GETCHAR(i) == 'e') {
  922. if (exponent_found || bin_found) {
  923. _make_error("Invalid numeric constant at 'e'");
  924. return;
  925. }
  926. exponent_found = true;
  927. } else if (_is_number(GETCHAR(i))) {
  928. //all ok
  929. } else if (bin_found && _is_bin(GETCHAR(i))) {
  930. } else if ((GETCHAR(i) == '-' || GETCHAR(i) == '+') && exponent_found) {
  931. if (sign_found) {
  932. _make_error("Invalid numeric constant at '-'");
  933. return;
  934. }
  935. sign_found = true;
  936. } else if (GETCHAR(i) == '_') {
  937. i++;
  938. continue; // Included for readability, shouldn't be a part of the string
  939. } else {
  940. break;
  941. }
  942. str += CharType(GETCHAR(i));
  943. i++;
  944. }
  945. if (!(_is_number(str[str.length() - 1]) || (hexa_found && _is_hex(str[str.length() - 1])))) {
  946. _make_error("Invalid numeric constant: " + str);
  947. return;
  948. }
  949. INCPOS(i);
  950. if (hexa_found) {
  951. int64_t val = str.hex_to_int64();
  952. _make_constant(val);
  953. } else if (bin_found) {
  954. int64_t val = str.bin_to_int64();
  955. _make_constant(val);
  956. } else if (period_found || exponent_found) {
  957. double val = str.to_double();
  958. _make_constant(val);
  959. } else {
  960. int64_t val = str.to_int64();
  961. _make_constant(val);
  962. }
  963. return;
  964. }
  965. if (GETCHAR(0) == '.') {
  966. //parse period
  967. _make_token(TK_PERIOD);
  968. break;
  969. }
  970. if (_is_text_char(GETCHAR(0))) {
  971. // parse identifier
  972. String str;
  973. str += CharType(GETCHAR(0));
  974. int i = 1;
  975. while (_is_text_char(GETCHAR(i))) {
  976. str += CharType(GETCHAR(i));
  977. i++;
  978. }
  979. // Detect preset keywords / functions using hashtable.
  980. bool found = _parse_identifier(str);
  981. if (!found) {
  982. _make_identifier(str);
  983. }
  984. INCPOS(str.length());
  985. return;
  986. }
  987. _make_error("Unknown character");
  988. return;
  989. } break;
  990. }
  991. INCPOS(1);
  992. break;
  993. }
  994. }
  995. void GDScriptTokenizerText::set_code(const String &p_code) {
  996. code = p_code;
  997. len = p_code.length();
  998. if (len) {
  999. _code = &code[0];
  1000. } else {
  1001. _code = nullptr;
  1002. }
  1003. code_pos = 0;
  1004. line = 1; //it is stand-ar-ized that lines begin in 1 in code..
  1005. column = 1; //the same holds for columns
  1006. tk_rb_pos = 0;
  1007. error_flag = false;
  1008. #ifdef DEBUG_ENABLED
  1009. ignore_warnings = false;
  1010. #endif // DEBUG_ENABLED
  1011. last_error = "";
  1012. for (int i = 0; i < MAX_LOOKAHEAD + 1; i++) {
  1013. _advance();
  1014. }
  1015. }
  1016. GDScriptTokenizerText::Token GDScriptTokenizerText::get_token(int p_offset) const {
  1017. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, TK_ERROR);
  1018. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, TK_ERROR);
  1019. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1020. return tk_rb[ofs].type;
  1021. }
  1022. int GDScriptTokenizerText::get_token_line(int p_offset) const {
  1023. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
  1024. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
  1025. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1026. return tk_rb[ofs].line;
  1027. }
  1028. int GDScriptTokenizerText::get_token_column(int p_offset) const {
  1029. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
  1030. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
  1031. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1032. return tk_rb[ofs].col;
  1033. }
  1034. const Variant &GDScriptTokenizerText::get_token_constant(int p_offset) const {
  1035. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, tk_rb[0].constant);
  1036. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, tk_rb[0].constant);
  1037. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1038. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_CONSTANT, tk_rb[0].constant);
  1039. return tk_rb[ofs].constant;
  1040. }
  1041. StringName GDScriptTokenizerText::get_token_identifier(int p_offset) const {
  1042. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, StringName());
  1043. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, StringName());
  1044. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1045. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_IDENTIFIER, StringName());
  1046. return tk_rb[ofs].identifier;
  1047. }
  1048. GDScriptFunctions::Function GDScriptTokenizerText::get_token_built_in_func(int p_offset) const {
  1049. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, GDScriptFunctions::FUNC_MAX);
  1050. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, GDScriptFunctions::FUNC_MAX);
  1051. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1052. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_FUNC, GDScriptFunctions::FUNC_MAX);
  1053. return tk_rb[ofs].func;
  1054. }
  1055. Variant::Type GDScriptTokenizerText::get_token_type(int p_offset) const {
  1056. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, Variant::NIL);
  1057. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, Variant::NIL);
  1058. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1059. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_TYPE, Variant::NIL);
  1060. return tk_rb[ofs].vtype;
  1061. }
  1062. int GDScriptTokenizerText::get_token_line_indent(int p_offset) const {
  1063. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
  1064. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
  1065. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1066. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
  1067. return tk_rb[ofs].constant.operator Vector2().x;
  1068. }
  1069. int GDScriptTokenizerText::get_token_line_tab_indent(int p_offset) const {
  1070. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
  1071. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
  1072. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1073. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
  1074. return tk_rb[ofs].constant.operator Vector2().y;
  1075. }
  1076. String GDScriptTokenizerText::get_token_error(int p_offset) const {
  1077. ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, String());
  1078. ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, String());
  1079. int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
  1080. ERR_FAIL_COND_V(tk_rb[ofs].type != TK_ERROR, String());
  1081. return tk_rb[ofs].constant;
  1082. }
  1083. void GDScriptTokenizerText::advance(int p_amount) {
  1084. ERR_FAIL_COND(p_amount <= 0);
  1085. for (int i = 0; i < p_amount; i++) {
  1086. _advance();
  1087. }
  1088. }
  1089. //////////////////////////////////////////////////////////////////////////////////////////////////////
  1090. #define BYTECODE_VERSION 13
  1091. Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
  1092. const uint8_t *buf = p_buffer.ptr();
  1093. int total_len = p_buffer.size();
  1094. ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
  1095. int version = decode_uint32(&buf[4]);
  1096. ERR_FAIL_COND_V_MSG(version > BYTECODE_VERSION, ERR_INVALID_DATA, "Bytecode is too recent! Please use a newer engine version.");
  1097. int identifier_count = decode_uint32(&buf[8]);
  1098. int constant_count = decode_uint32(&buf[12]);
  1099. int line_count = decode_uint32(&buf[16]);
  1100. int token_count = decode_uint32(&buf[20]);
  1101. const uint8_t *b = &buf[24];
  1102. total_len -= 24;
  1103. identifiers.resize(identifier_count);
  1104. for (int i = 0; i < identifier_count; i++) {
  1105. int len = decode_uint32(b);
  1106. ERR_FAIL_COND_V(len > total_len, ERR_INVALID_DATA);
  1107. b += 4;
  1108. Vector<uint8_t> cs;
  1109. cs.resize(len);
  1110. for (int j = 0; j < len; j++) {
  1111. cs.write[j] = b[j] ^ 0xb6;
  1112. }
  1113. cs.write[cs.size() - 1] = 0;
  1114. String s;
  1115. s.parse_utf8((const char *)cs.ptr());
  1116. b += len;
  1117. total_len -= len + 4;
  1118. identifiers.write[i] = s;
  1119. }
  1120. constants.resize(constant_count);
  1121. for (int i = 0; i < constant_count; i++) {
  1122. Variant v;
  1123. int len;
  1124. // An object cannot be constant, never decode objects
  1125. Error err = decode_variant(v, b, total_len, &len, false);
  1126. if (err) {
  1127. return err;
  1128. }
  1129. b += len;
  1130. total_len -= len;
  1131. constants.write[i] = v;
  1132. }
  1133. ERR_FAIL_COND_V(line_count * 8 > total_len, ERR_INVALID_DATA);
  1134. for (int i = 0; i < line_count; i++) {
  1135. uint32_t token = decode_uint32(b);
  1136. b += 4;
  1137. uint32_t linecol = decode_uint32(b);
  1138. b += 4;
  1139. lines.insert(token, linecol);
  1140. total_len -= 8;
  1141. }
  1142. tokens.resize(token_count);
  1143. for (int i = 0; i < token_count; i++) {
  1144. ERR_FAIL_COND_V(total_len < 1, ERR_INVALID_DATA);
  1145. if ((*b) & TOKEN_BYTE_MASK) { //little endian always
  1146. ERR_FAIL_COND_V(total_len < 4, ERR_INVALID_DATA);
  1147. tokens.write[i] = decode_uint32(b) & ~TOKEN_BYTE_MASK;
  1148. b += 4;
  1149. } else {
  1150. tokens.write[i] = *b;
  1151. b += 1;
  1152. total_len--;
  1153. }
  1154. }
  1155. token = 0;
  1156. return OK;
  1157. }
  1158. Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) {
  1159. Vector<uint8_t> buf;
  1160. Map<StringName, int> identifier_map;
  1161. HashMap<Variant, int, VariantHasher, VariantComparator> constant_map;
  1162. Map<uint32_t, int> line_map;
  1163. Vector<uint32_t> token_array;
  1164. GDScriptTokenizerText tt;
  1165. tt.set_code(p_code);
  1166. int line = -1;
  1167. while (true) {
  1168. if (tt.get_token_line() != line) {
  1169. line = tt.get_token_line();
  1170. line_map[line] = token_array.size();
  1171. }
  1172. uint32_t token = tt.get_token();
  1173. switch (tt.get_token()) {
  1174. case TK_IDENTIFIER: {
  1175. StringName id = tt.get_token_identifier();
  1176. if (!identifier_map.has(id)) {
  1177. int idx = identifier_map.size();
  1178. identifier_map[id] = idx;
  1179. }
  1180. token |= identifier_map[id] << TOKEN_BITS;
  1181. } break;
  1182. case TK_CONSTANT: {
  1183. const Variant &c = tt.get_token_constant();
  1184. if (!constant_map.has(c)) {
  1185. int idx = constant_map.size();
  1186. constant_map[c] = idx;
  1187. }
  1188. token |= constant_map[c] << TOKEN_BITS;
  1189. } break;
  1190. case TK_BUILT_IN_TYPE: {
  1191. token |= tt.get_token_type() << TOKEN_BITS;
  1192. } break;
  1193. case TK_BUILT_IN_FUNC: {
  1194. token |= tt.get_token_built_in_func() << TOKEN_BITS;
  1195. } break;
  1196. case TK_NEWLINE: {
  1197. token |= tt.get_token_line_indent() << TOKEN_BITS;
  1198. } break;
  1199. case TK_ERROR: {
  1200. ERR_FAIL_V(Vector<uint8_t>());
  1201. } break;
  1202. default: {
  1203. }
  1204. };
  1205. token_array.push_back(token);
  1206. if (tt.get_token() == TK_EOF) {
  1207. break;
  1208. }
  1209. tt.advance();
  1210. }
  1211. //reverse maps
  1212. Map<int, StringName> rev_identifier_map;
  1213. for (Map<StringName, int>::Element *E = identifier_map.front(); E; E = E->next()) {
  1214. rev_identifier_map[E->get()] = E->key();
  1215. }
  1216. Map<int, Variant> rev_constant_map;
  1217. const Variant *K = nullptr;
  1218. while ((K = constant_map.next(K))) {
  1219. rev_constant_map[constant_map[*K]] = *K;
  1220. }
  1221. Map<int, uint32_t> rev_line_map;
  1222. for (Map<uint32_t, int>::Element *E = line_map.front(); E; E = E->next()) {
  1223. rev_line_map[E->get()] = E->key();
  1224. }
  1225. //save header
  1226. buf.resize(24);
  1227. buf.write[0] = 'G';
  1228. buf.write[1] = 'D';
  1229. buf.write[2] = 'S';
  1230. buf.write[3] = 'C';
  1231. encode_uint32(BYTECODE_VERSION, &buf.write[4]);
  1232. encode_uint32(identifier_map.size(), &buf.write[8]);
  1233. encode_uint32(constant_map.size(), &buf.write[12]);
  1234. encode_uint32(line_map.size(), &buf.write[16]);
  1235. encode_uint32(token_array.size(), &buf.write[20]);
  1236. //save identifiers
  1237. for (Map<int, StringName>::Element *E = rev_identifier_map.front(); E; E = E->next()) {
  1238. CharString cs = String(E->get()).utf8();
  1239. int len = cs.length() + 1;
  1240. int extra = 4 - (len % 4);
  1241. if (extra == 4) {
  1242. extra = 0;
  1243. }
  1244. uint8_t ibuf[4];
  1245. encode_uint32(len + extra, ibuf);
  1246. for (int i = 0; i < 4; i++) {
  1247. buf.push_back(ibuf[i]);
  1248. }
  1249. for (int i = 0; i < len; i++) {
  1250. buf.push_back(cs[i] ^ 0xb6);
  1251. }
  1252. for (int i = 0; i < extra; i++) {
  1253. buf.push_back(0 ^ 0xb6);
  1254. }
  1255. }
  1256. for (Map<int, Variant>::Element *E = rev_constant_map.front(); E; E = E->next()) {
  1257. int len;
  1258. // Objects cannot be constant, never encode objects
  1259. Error err = encode_variant(E->get(), nullptr, len, false);
  1260. ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
  1261. int pos = buf.size();
  1262. buf.resize(pos + len);
  1263. encode_variant(E->get(), &buf.write[pos], len, false);
  1264. }
  1265. for (Map<int, uint32_t>::Element *E = rev_line_map.front(); E; E = E->next()) {
  1266. uint8_t ibuf[8];
  1267. encode_uint32(E->key(), &ibuf[0]);
  1268. encode_uint32(E->get(), &ibuf[4]);
  1269. for (int i = 0; i < 8; i++) {
  1270. buf.push_back(ibuf[i]);
  1271. }
  1272. }
  1273. for (int i = 0; i < token_array.size(); i++) {
  1274. uint32_t token = token_array[i];
  1275. if (token & ~TOKEN_MASK) {
  1276. uint8_t buf4[4];
  1277. encode_uint32(token_array[i] | TOKEN_BYTE_MASK, &buf4[0]);
  1278. for (int j = 0; j < 4; j++) {
  1279. buf.push_back(buf4[j]);
  1280. }
  1281. } else {
  1282. buf.push_back(token);
  1283. }
  1284. }
  1285. return buf;
  1286. }
  1287. GDScriptTokenizerBuffer::Token GDScriptTokenizerBuffer::get_token(int p_offset) const {
  1288. int offset = token + p_offset;
  1289. if (offset < 0 || offset >= tokens.size()) {
  1290. return TK_EOF;
  1291. }
  1292. return GDScriptTokenizerBuffer::Token(tokens[offset] & TOKEN_MASK);
  1293. }
  1294. StringName GDScriptTokenizerBuffer::get_token_identifier(int p_offset) const {
  1295. int offset = token + p_offset;
  1296. ERR_FAIL_INDEX_V(offset, tokens.size(), StringName());
  1297. uint32_t identifier = tokens[offset] >> TOKEN_BITS;
  1298. ERR_FAIL_UNSIGNED_INDEX_V(identifier, (uint32_t)identifiers.size(), StringName());
  1299. return identifiers[identifier];
  1300. }
  1301. GDScriptFunctions::Function GDScriptTokenizerBuffer::get_token_built_in_func(int p_offset) const {
  1302. int offset = token + p_offset;
  1303. ERR_FAIL_INDEX_V(offset, tokens.size(), GDScriptFunctions::FUNC_MAX);
  1304. return GDScriptFunctions::Function(tokens[offset] >> TOKEN_BITS);
  1305. }
  1306. Variant::Type GDScriptTokenizerBuffer::get_token_type(int p_offset) const {
  1307. int offset = token + p_offset;
  1308. ERR_FAIL_INDEX_V(offset, tokens.size(), Variant::NIL);
  1309. return Variant::Type(tokens[offset] >> TOKEN_BITS);
  1310. }
  1311. int GDScriptTokenizerBuffer::get_token_line(int p_offset) const {
  1312. int offset = token + p_offset;
  1313. int pos = lines.find_nearest(offset);
  1314. if (pos < 0) {
  1315. return -1;
  1316. }
  1317. if (pos >= lines.size()) {
  1318. pos = lines.size() - 1;
  1319. }
  1320. uint32_t l = lines.getv(pos);
  1321. return l & TOKEN_LINE_MASK;
  1322. }
  1323. int GDScriptTokenizerBuffer::get_token_column(int p_offset) const {
  1324. int offset = token + p_offset;
  1325. int pos = lines.find_nearest(offset);
  1326. if (pos < 0) {
  1327. return -1;
  1328. }
  1329. if (pos >= lines.size()) {
  1330. pos = lines.size() - 1;
  1331. }
  1332. uint32_t l = lines.getv(pos);
  1333. return l >> TOKEN_LINE_BITS;
  1334. }
  1335. int GDScriptTokenizerBuffer::get_token_line_indent(int p_offset) const {
  1336. int offset = token + p_offset;
  1337. ERR_FAIL_INDEX_V(offset, tokens.size(), 0);
  1338. return tokens[offset] >> TOKEN_BITS;
  1339. }
  1340. const Variant &GDScriptTokenizerBuffer::get_token_constant(int p_offset) const {
  1341. int offset = token + p_offset;
  1342. ERR_FAIL_INDEX_V(offset, tokens.size(), nil);
  1343. uint32_t constant = tokens[offset] >> TOKEN_BITS;
  1344. ERR_FAIL_UNSIGNED_INDEX_V(constant, (uint32_t)constants.size(), nil);
  1345. return constants[constant];
  1346. }
  1347. String GDScriptTokenizerBuffer::get_token_error(int p_offset) const {
  1348. ERR_FAIL_V(String());
  1349. }
  1350. void GDScriptTokenizerBuffer::advance(int p_amount) {
  1351. ERR_FAIL_INDEX(p_amount + token, tokens.size());
  1352. token += p_amount;
  1353. }
  1354. GDScriptTokenizerBuffer::GDScriptTokenizerBuffer() {
  1355. token = 0;
  1356. }