123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565 |
- /* GCSx
- ** TOKENIZE.CPP
- **
- ** Script tokenization (to feed to compiler)
- */
- /*****************************************************************************
- ** Copyright (C) 2003-2006 Janson
- **
- ** This program is free software; you can redistribute it and/or modify
- ** it under the terms of the GNU General Public License as published by
- ** the Free Software Foundation; either version 2 of the License, or
- ** (at your option) any later version.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU General Public License for more details.
- **
- ** You should have received a copy of the GNU General Public License
- ** along with this program; if not, write to the Free Software
- ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
- #include "all.h"
- void Tokenizer::deallocRange(list<Tokenizer::Token>::iterator start, list<Tokenizer::Token>::iterator end) { start_func
- for (; start != end; ++start) {
- delete (*start).text;
- (*start).text = NULL;
- }
- }
- // Maps strings to tokens- derived from tokenStrings
- map<string, int>* Tokenizer::tokenLookup = NULL;
- Tokenizer::Tokenizer(const list<string>* src) : cached(), bookmarks() { start_func
- initTokenLookups();
- source = src;
- row = source->begin();
- rowNum = 0;
- col = 0;
- atNewLine = 1;
- nextCloseBrace = 0;
- if (row != source->end()) rowLen = (*row).size();
- cacheRecord = 0;
- bookmarkNew = 0;
- cacheReplay = cached.end();
- nextBookmarkName = 1000;
-
- errorCount = 0;
- warningCount = 0;
- silent = 0;
- errRow = 0;
- errCol = 0;
- errBuffer = NULL;
- }
- Tokenizer::~Tokenizer() { start_func
- deallocRange(cached.begin(), cached.end());
- delete[] errBuffer;
- }
- int Tokenizer::getBookmarkName() { start_func
- return ++nextBookmarkName;
- }
- void Tokenizer::initTokenLookups() { start_func
- if (tokenLookup == NULL) {
- tokenLookup = new map<string, int>;
- int pos = 0;
- while (tokenStrings[pos].text) {
- string token = tokenStrings[pos].text;
- tokenLookup->operator[](token) = tokenStrings[pos].type;
- ++pos;
- }
- }
- }
- void Tokenizer::destroyGlobals() { start_func
- delete tokenLookup;
- tokenLookup = NULL;
- }
- int Tokenizer::atEOF() { start_func
- if (row == source->end()) return 1;
- return 0;
- }
- char Tokenizer::getCharacter() { start_func
- tokenizerAssert(row != source->end());
- if (col < rowLen) return (*row)[col];
- return '\0';
- }
- void Tokenizer::moveNext() { start_func
- tokenizerAssert(row != source->end());
- if (++col > rowLen) nextLine();
- }
- void Tokenizer::nextLine() { start_func
- tokenizerAssert(row != source->end());
- col = 0;
- ++row;
- ++rowNum;
- if (row != source->end()) rowLen = (*row).size();
- }
- string Tokenizer::grabUntil(const char* boundaries) throw_int { start_func
- tokenizerAssert(row != source->end());
- if (col >= rowLen) throw 1;
- string::size_type pos = (*row).find_first_of(boundaries, col);
- if (pos >= string::npos) throw 1;
- int prev = col;
- col = pos;
- return (*row).substr(prev, col - prev);
- }
- string Tokenizer::grabWhile(const char* charset) { start_func
- tokenizerAssert(row != source->end());
- string::size_type pos = (*row).find_first_not_of(charset, col);
- if (pos >= string::npos) pos = rowLen;
- int prev = col;
- col = pos;
- return (*row).substr(prev, col - prev);
- }
- string Tokenizer::grabRestOfLine() { start_func
- tokenizerAssert(row != source->end());
- if (col >= rowLen) return blankString;
- int prev = col;
- col = rowLen;
- return (*row).substr(prev, col - prev);
- }
- int Tokenizer::nextToken(int& type, string& token) { start_func
- if (cacheReplay != cached.end()) {
- errRow = (*cacheReplay).rowN;
- errCol = (*cacheReplay).colN;
- type = (*cacheReplay).type;
- token = *((*cacheReplay).text);
- ++cacheReplay;
- // If at end of cache and not recording, clear
- if ((!cacheRecord) && (cacheReplay == cached.end())) {
- deallocRange(cached.begin(), cached.end());
- cached.clear();
- // Replay pointer is already at end
- tokenizerAssert(cacheReplay == cached.end());
- }
- return 1;
- }
-
- int debug = debugLevel() & DEBUG_TOKENIZE;
- if (nextCloseBrace) {
- type = nextCloseBrace;
- token = "}";
- nextCloseBrace = 0;
- }
- else {
- do {
- // EOF?
- if (atEOF()) {
- if (debug) debugWrite(DEBUG_TOKENIZE, "Token: END OF FILE");
- token = blankString;
- type = TOKEN_NONE;
- return 0;
- }
-
- // Clear any whitespace
- grabWhile(WHITE_SPACE);
-
- errRow = rowNum;
- errCol = col;
-
- // Peek at next character to determine what sort of token to parse
- char tokenType = getCharacter();
-
- switch (tokenType) {
- case '\0':
- // End of line
- moveNext();
- token = blankString;
- type = TOKEN_ENDLINE;
-
- // Scan forward to see if a { coming up
- {
- int sCol = col;
- list<string>::const_iterator sRow = row;
- string::size_type pos;
-
- for (;;) {
- if (sRow == source->end()) break;
- pos = (*sRow).find_first_not_of(WHITE_SPACE, sCol);
- if (pos >= string::npos) {
- sCol = 0;
- ++sRow;
- continue;
- }
- if ((*sRow)[pos] == '{') {
- // { is coming up- force discard of endline token
- atNewLine = 1;
- }
- break;
- }
- }
- break;
-
- case '\'':
- // Type string
- token = blankString;
- moveNext();
- try {
- token += grabUntil("\'");
- moveNext();
- }
- catch (int) {
- outputError("No type-string terminator found on same line (missing ')");
- // RESOLUTION: treat remainder of line as type string
- token += grabRestOfLine();
- }
- toLower(token);
- type = TOKEN_STRINGTYPE;
- break;
-
- case '"':
- // String
- token = blankString;
- moveNext();
- try {
- for (;;) {
- token += grabUntil("\"\\");
- if (getCharacter() == '\\') {
- // Escape sequences
- moveNext();
- switch (tokenType = getCharacter()) {
- case 'n':
- token += "\n";
- break;
- case 'r':
- token += "\r";
- break;
- case 't':
- token += "\t";
- break;
- default:
- outputWarning("Unrecognized escape sequence '\\%c' (to include a backslash in a string, use \\\\)", tokenType);
- // RESOLUTION: insert backslash and character verbatim
- token += "\\";
- // (fall through)
- case '\\':
- case '"':
- token += string(1, tokenType);
- break;
- }
- moveNext();
- }
- else {
- moveNext();
- break;
- }
- }
- }
- catch (int) {
- outputError("No string terminator found on same line (missing \")");
- // RESOLUTION: treat remainder of line as string
- token += grabRestOfLine();
- }
- type = TOKEN_STRING;
- break;
-
- case '#':
- // Configuration?
- if (atNewLine) {
- token = grabWhile("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789");
- toLower(token);
- type = TOKEN_CONFIG;
- break;
- }
-
- // (otherwise, fall through to normal tokenization)
-
- default:
- if (((tokenType >= 'a') && (tokenType <= 'z')) ||
- ((tokenType >= 'A') && (tokenType <= 'Z')) ||
- (tokenType == '_')) {
- // Identifier / keyword / etc
- token = grabWhile("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789");
- toLower(token);
- type = TOKEN_IDENTIFIER;
-
- // Lookup token in map; otherwise an identifier
- map<string, int>::iterator found = tokenLookup->find(token);
- if (found != tokenLookup->end()) type = (*found).second;
-
- // obj_ is always reserved, not identifier
- if (type == TOKEN_IDENTIFIER)
- if (token.substr(0, 4) == string("obj_"))
- type = TOKEN_RESERVED;
- }
- else if ((tokenType >= '0') && (tokenType <= '9')) {
- // Number
- token = grabWhile("0123456789");
- // Special case- 0x
- if ((token.size() == 1) && (tokenType == '0') && (tolower(getCharacter()) == 'x')) {
- token += "x";
- moveNext();
- string add = grabWhile("0123456789abcdefABCDEF");
- token += add;
- toLower(token);
-
- if (add.size() == 0) {
- outputError("Invalid hexadecimal constant '%s'", token.c_str());
- // RESOLUTION: add a zero and continue
- token += "0";
- }
-
- type = TOKEN_HEX;
- }
- // One decimal allowed
- else if (getCharacter() == '.') {
- token += ".";
- moveNext();
- string add = grabWhile("0123456789");
- token += add;
-
- if (add.size() == 0) {
- outputWarning("Invalid decimal constant '%s' (digits must appear before and after decimal point)", token.c_str());
- // RESOLUTION: add a zero and continue
- token += "0";
- }
-
- type = TOKEN_DECIMAL;
- }
- else {
- type = TOKEN_INTEGER;
- }
-
- // Check for invalid character sequence afterward
- string mess = grabWhile("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789");
- if (mess.size() != 0) {
- outputError("Unrecognized character sequence '%s' at end of numeric constant (identifiers may not begin with a number)", mess.c_str());
- // RESOLUTION: continue compiling, having discarded invalid characters
- }
- }
- else {
- // Operator / symbol / comment / etc
- map<string, int>::iterator found;
- token = string(1, tokenType);
- do {
- // Add to token until it no longer matches a symbol we want
- moveNext();
- token += string(1, tokenType = getCharacter());
- found = tokenLookup->find(token);
- } while ((tokenType != '\0') && (found != tokenLookup->end()));
-
- // The last character we added was not actually discarded, remove it
- token = token.substr(0, token.size() - 1);
-
- // Determine token type
- found = tokenLookup->find(token);
- if (found == tokenLookup->end()) type = TOKEN_UNKNOWN;
- else type = (*found).second;
-
- if (type == TOKEN_COMMENT_LINE) {
- nextLine();
- token = blankString;
- type = TOKEN_ENDLINE;
- }
-
- else if (type == TOKEN_COMMENT_BLOCK) {
- for (;;) {
- if (atEOF()) {
- outputWarning("No end-of-comment marker found (missing */ symbol)");
- // RESOLUTION: treat as end-of-file
- if (debug) debugWrite(DEBUG_TOKENIZE, "Token: END OF FILE");
- token = blankString;
- type = TOKEN_NONE;
- return 0;
- }
- tokenType = getCharacter();
- moveNext();
- if ((tokenType == '*') && (getCharacter() == '/')) {
- moveNext();
- break;
- }
- }
- }
- }
-
- break;
- }
-
- // Loop if we get an endline right after a new line (skip blank lines/empty cmds)
- // Loop for comments also
- } while (((atNewLine) && (type == TOKEN_ENDLINE)) || (type == TOKEN_COMMENT_BLOCK));
- }
-
- // Turn into a new line if before a }
- if ((type == TOKEN_CLOSE_BRACE) && (!atNewLine)) {
- nextCloseBrace = TOKEN_CLOSE_BRACE;
- type = TOKEN_ENDLINE; // Will set atNewLine below
- }
-
- // At a new line for next time?
- // Hide newlines after a { or }
- if ((type == TOKEN_ENDLINE) || (type == TOKEN_CLOSE_BRACE) || (type == TOKEN_OPEN_BRACE)) atNewLine = 1;
- else atNewLine = 0;
-
- // Debug?
- if (debug) {
- if (type & TOKEN_KEYWORD) debugWrite(DEBUG_TOKENIZE, "Token: KEYWORD - %s", token.c_str());
- else if (type & TOKEN_OPERATOR) debugWrite(DEBUG_TOKENIZE, "Token: OPERATOR - %s", token.c_str());
- else debugWrite(DEBUG_TOKENIZE, "Token: %s - %s", debugText[type], token.c_str());
- }
- if (cacheRecord) {
- Token recorded;
- recorded.type = type;
- recorded.text = new string(token);
- recorded.rowN = errRow;
- recorded.colN = errCol;
- cached.push_back(recorded);
- if (bookmarkNew) {
- for (map<int, list<Token>::iterator>::iterator pos = bookmarks.begin(); pos != bookmarks.end(); ++pos) {
- if ((*pos).second == cached.end()) --(*pos).second;
- }
- bookmarkNew = 0;
- }
- // Replay pointer is already at end
- tokenizerAssert(cacheReplay == cached.end());
- }
- return 1;
- }
- void Tokenizer::skipToken() { start_func
- if (cacheReplay != cached.end()) {
- ++cacheReplay;
- // If at end of cache and not recording, clear
- if ((!cacheRecord) && (cacheReplay == cached.end())) {
- deallocRange(cached.begin(), cached.end());
- cached.clear();
- // Replay pointer is already at end
- tokenizerAssert(cacheReplay == cached.end());
- }
- }
- else {
- int type;
- string token;
- nextToken(type, token);
- }
- }
- int Tokenizer::peekToken(int& type, string& token) { start_func
- if (cacheReplay != cached.end()) {
- errRow = (*cacheReplay).rowN;
- errCol = (*cacheReplay).colN;
- type = (*cacheReplay).type;
- token = *((*cacheReplay).text);
- return 1;
- }
- if (nextToken(type, token)) {
- // Don't readd to cache if already recording
- if (!cacheRecord) {
- Token peeked;
- peeked.type = type;
- peeked.text = new string(token);
- peeked.rowN = errRow;
- peeked.colN = errCol;
- cached.push_back(peeked);
- }
- // Replay pointer is at end- move to next-to-last
- tokenizerAssert(cacheReplay == cached.end());
- --cacheReplay;
- return 1;
- }
- return 0;
- }
- void Tokenizer::bookmarkStore(int name) { start_func
- bookmarks[name] = cacheReplay;
- if (cacheReplay == cached.end()) bookmarkNew = 1;
- cacheRecord = 1;
- }
- void Tokenizer::bookmarkReturn(int name) { start_func
- tokenizerAssert(bookmarks.find(name) != bookmarks.end());
- // Return to start of cache
- cacheReplay = (*(bookmarks.find(name))).second;
- }
- void Tokenizer::bookmarkCancel(int name) { start_func
- if (bookmarks.find(name) != bookmarks.end()) {
- bookmarks.erase(name);
- if (bookmarks.empty()) {
- cacheRecord = 0;
- // Clear anything in cache prior to current replay position
- deallocRange(cached.begin(), cacheReplay);
- cached.erase(cached.begin(), cacheReplay);
- tokenizerAssert(cacheReplay == cached.begin());
- }
- }
- }
- #define ERROR_BUFFER_SIZE 1024
- void Tokenizer::outputError(const char* text, ...) { start_func
- va_list arglist;
- va_start(arglist, text);
- if (!silent) {
- if (!errBuffer) errBuffer = new char[ERROR_BUFFER_SIZE];
-
- vsnprintf(errBuffer, ERROR_BUFFER_SIZE, text, arglist);
- errBuffer[ERROR_BUFFER_SIZE - 1] = 0;
-
- // @TODO: Better output (debug window during gameplay; error window during editor)
- debugWrite("ERROR row %d col %d: %s", errRow + 1, errCol + 1, errBuffer);
- }
-
- ++errorCount;
- va_end(arglist);
- }
- void Tokenizer::outputWarning(const char* text, ...) { start_func
- va_list arglist;
- va_start(arglist, text);
- if (!silent) {
- if (!errBuffer) errBuffer = new char[ERROR_BUFFER_SIZE];
-
- vsnprintf(errBuffer, ERROR_BUFFER_SIZE, text, arglist);
- errBuffer[ERROR_BUFFER_SIZE - 1] = 0;
-
- // @TODO: Better output (debug window during gameplay; error window during editor)
- debugWrite("WARNING row %d col %d: %s", errRow + 1, errCol + 1, errBuffer);
- }
- ++warningCount;
- va_end(arglist);
- }
- void Tokenizer::silentErrors(int newSilent) { start_func
- silent = newSilent;
- }
- void Tokenizer::resetErrors() { start_func
- errorCount = 0;
- warningCount = 0;
- silent = 0;
- }
|