10 Коміти aa739b73ee ... 580d760b5a

Автор SHA1 Опис Дата
  Steven Oud 580d760b5a bitwise shift oprators 9 роки тому
  Steven Oud 84230f026c order does no longer matter in symbols struct in the lexer 9 роки тому
  Steven Oud 691ca51d72 keywords seperated for symbols so identifiers get read correctly 9 роки тому
  Steven Oud e4e9112581 finished lexer rewrite and tidied parser 9 роки тому
  Steven Oud 9bbeeddd27 revised lexing of multi-char tokens 9 роки тому
  Steven Oud e602a89056 char data type and reworked line number in error messages 9 роки тому
  soud 7206f9b3d4 rpn to ast start 9 роки тому
  soud f87c915102 fixed stupid memory bug and some reformatting 9 роки тому
  Steven 566f0943ac updated `llvm-config --cflags` enables -Wwrite-strings, so make it shut up 9 роки тому
  soud e21117a235 expression parser improvement 9 роки тому
10 змінених файлів з 256 додано та 292 видалено
  1. 1 1
      Makefile
  2. 1 1
      README.md
  3. 1 0
      src/ast.h
  4. 22 9
      src/erupt.c
  5. 6 6
      src/erupt.h
  6. 126 182
      src/lexer.c
  7. 0 30
      src/lexer.h
  8. 2 5
      src/main.c
  9. 97 58
      src/parser.c
  10. 0 0
      src/parser.h

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 CC?=gcc
 CXX?=g++
 
-CFLAGS=-Wall -Wextra -O2 -std=c11 `llvm-config --cflags` -c
+CFLAGS=-Wall -Wextra -O2 -std=c11 `llvm-config --cflags` -Wno-discarded-qualifiers -c
 LDFLAGS=`llvm-config --cxxflags --ldflags`
 
 CFILES=src/ast.c src/erupt.c src/lexer.c src/main.c src/parser.c src/token.c \

+ 1 - 1
README.md

@@ -10,7 +10,7 @@ use io;
 
 fun main
 {
-    io::writeln('Hello world');
+    io::writeln("Hello world");
 }
 ```
 

+ 1 - 0
src/ast.h

@@ -39,6 +39,7 @@ typedef enum {
         U32,
         U64,
         STR,
+        CHAR,
         BOOL,
         FLOAT,
         F32,

+ 22 - 9
src/erupt.c

@@ -69,40 +69,53 @@ void verbose_printf(const char *fmt, ...)
         va_end(arg);
 }
 
-void warning_printf(const char *m, const char *fmt, ...)
+void warning_printf(const char *m, size_t line, const char *fmt, ...)
 {
         va_list arg;
 
         va_start(arg, fmt);
 
-        fprintf(stderr, BOLD "[%s]" COLOR_RESET " %s: ",
-                        m, (BOLD YELLOW_TEXT "warning" COLOR_RESET));
+        if (line)
+                fprintf(stderr, BOLD "%s:%zu:" COLOR_RESET " %s ",
+                                m, line, (BOLD YELLOW_TEXT "warning:" COLOR_RESET));
+        else
+                fprintf(stderr, BOLD "%s:" COLOR_RESET " %s ",
+                                m, (BOLD YELLOW_TEXT "warning:" COLOR_RESET));
         vfprintf(stderr, fmt, arg);
 
         va_end(arg);
 }
 
-void error_printf(const char *m, const char *fmt, ...)
+void error_printf(const char *m, size_t line, const char *fmt, ...)
 {
         va_list arg;
 
         va_start(arg, fmt);
 
-        fprintf(stderr, BOLD "[%s]" COLOR_RESET " %s: ",
-                        m, (BOLD RED_TEXT "error" COLOR_RESET));
+        if (line)
+                fprintf(stderr, BOLD "%s:%zu:" COLOR_RESET " %s ",
+                                m, line, (BOLD RED_TEXT "error:" COLOR_RESET));
+        else
+                fprintf(stderr, BOLD "%s:" COLOR_RESET " %s ",
+                                m, (BOLD RED_TEXT "error:" COLOR_RESET));
+
         vfprintf(stderr, fmt, arg);
 
         va_end(arg);
 }
 
-void fatal_error(const char *m, const char *fmt, ...)
+void fatal_error(const char *m, size_t line, const char *fmt, ...)
 {
         va_list arg;
 
         va_start(arg, fmt);
 
-        fprintf(stderr, BOLD "[%s]" COLOR_RESET " %s: ",
-                        m, (BOLD RED_TEXT "fatal error" COLOR_RESET));
+        if (line)
+                fprintf(stderr, BOLD "%s:%zu:" COLOR_RESET " %s ",
+                                m, line, (BOLD RED_TEXT "fatal error:" COLOR_RESET));
+        else
+                fprintf(stderr, BOLD "%s:" COLOR_RESET " %s ",
+                                m, (BOLD RED_TEXT "fatal error:" COLOR_RESET));
         vfprintf(stderr, fmt, arg);
 
         va_end(arg);

+ 6 - 6
src/erupt.h

@@ -38,11 +38,11 @@
 #define ERUPT_OK 0
 #define ERUPT_ERROR -1
 
-#define erupt_error(...) error_printf("erupt", ##__VA_ARGS__)
+#define erupt_error(...) error_printf("erupt", 0, ##__VA_ARGS__)
 #define file_error(...) error_printf(TARGET_FILE, ##__VA_ARGS__)
-#define erupt_fatal_error(...) fatal_error("erupt", ##__VA_ARGS__)
+#define erupt_fatal_error(...) fatal_error("erupt", 0, ##__VA_ARGS__)
 #define file_fatal_error(...) fatal_error(TARGET_FILE, ##__VA_ARGS__)
-#define erupt_warning(...) warning_printf("erupt", ##__VA_ARGS__)
+#define erupt_warning(...) warning_printf("erupt", 0, ##__VA_ARGS__)
 #define file_warning(...) warning_printf(TARGET_FILE, ##__VA_ARGS__)
 
 /* no colors for windows */
@@ -69,8 +69,8 @@ extern char *TARGET_FILE;
 void *smalloc(size_t size);
 void set_output_name(char *filename);
 void verbose_printf(const char *fmt, ...);
-void warning_printf(const char *m, const char *fmt, ...);
-void error_printf(const char *m, const char *fmt, ...);
-void fatal_error(const char *m, const char *fmt, ...);
+void warning_printf(const char *m, size_t line, const char *fmt, ...);
+void error_printf(const char *m, size_t line, const char *fmt, ...);
+void fatal_error(const char *m, size_t line, const char *fmt, ...);
 
 #endif /* !ERUPT_H */

+ 126 - 182
src/lexer.c

@@ -27,32 +27,81 @@ static lexer_t *create_lexer(char *target_file, char *source);
 static char peek(lexer_t *lexer);
 static char eat(lexer_t *lexer);
 static void emit_token(lexer_t *lexer, tokentype type);
+static bool read_symbol(lexer_t *lexer);
 static tokentype get_keyword(lexer_t *lexer);
 static bool is_number(char c);
 static bool is_ident(char c);
 static bool is_whitespace(char c);
-static void read_string(lexer_t *lexer, char id);
+static void read_chars(lexer_t *lexer, char id);
 static void read_number(lexer_t *lexer);
 static void read_ident(lexer_t *lexer);
 static void read_comment(lexer_t *lexer);
 
-/* string => ketword_t "map" for detecting keywords */
-static keyword_t keywords[] = {
-        {FUNCTION_KEYWORD , TOKEN_FUNCTION},
-        {TRUE_KEYWORD     , TOKEN_TRUE},
-        {FALSE_KEYWORD    , TOKEN_FALSE},
-        {MUTABLE_KEYWORD  , TOKEN_MUTABLE},
-        {IMPORT_KEYWORD   , TOKEN_IMPORT},
-        {RETURN_KEYWORD   , TOKEN_RETURN},
-        {FOR_KEYWORD      , TOKEN_FOR},
-        {WHILE_KEYWORD    , TOKEN_WHILE},
-        {BREAK_KEYWORD    , TOKEN_BREAK},
-        {CONTINUE_KEYWORD , TOKEN_CONTINUE},
-        {IF_KEYWORD       , TOKEN_IF},
-        {UNLESS_KEYWORD   , TOKEN_UNLESS},
-        {ELSE_KEYWORD     , TOKEN_ELSE},
+static const keyword_t symbols[] = {
+        {"+"   , TOKEN_PLUS},
+        {"-"   , TOKEN_MIN},
+        {"*"   , TOKEN_STAR},
+        {"/"   , TOKEN_SLASH},
+        {"**"  , TOKEN_STAR_STAR},
+        {"%"   , TOKEN_MOD},
+        {"&"   , TOKEN_B_AND},
+        {"|"   , TOKEN_B_OR},
+        {"~"   , TOKEN_B_NOT},
+        {"^"   , TOKEN_B_XOR},
+        {"!"   , TOKEN_BANG},
+        {"<"   , TOKEN_LT},
+        {">"   , TOKEN_GT},
+        {"<<"  , TOKEN_L_SHIFT},
+        {">>"  , TOKEN_R_SHIFT},
+        {"="   , TOKEN_EQ},
+
+        {"+="  , TOKEN_PLUS_EQ},
+        {"-="  , TOKEN_MIN_EQ},
+        {"-="  , TOKEN_MIN_EQ},
+        {"->"  , TOKEN_R_ARROW},
+        {"*="  , TOKEN_STAR_EQ},
+        {"%="  , TOKEN_MOD_EQ},
+        {"&="  , TOKEN_B_AND_EQ},
+        {"|="  , TOKEN_B_OR_EQ},
+        {"~="  , TOKEN_B_NOT_EQ},
+        {"^="  , TOKEN_B_XOR_EQ},
+        {"/="  , TOKEN_SLASH_EQ},
+        {"!="  , TOKEN_NOT_EQ},
+        {"!="  , TOKEN_NOT_EQ},
+        {"<="  , TOKEN_LT_EQ},
+        {">="  , TOKEN_GT_EQ},
+        {"=="  , TOKEN_EQ_EQ},
+
+        {"&&"  , TOKEN_AND},
+        {"||"  , TOKEN_OR},
+        {"::"  , TOKEN_NS_SEP},
+        {":"   , TOKEN_COLON},
+        {"("   , TOKEN_L_PAREN},
+        {")"   , TOKEN_R_PAREN},
+        {"["   , TOKEN_L_BRACKET},
+        {"]"   , TOKEN_R_BRACKET},
+        {"{"   , TOKEN_L_BRACE},
+        {"}"   , TOKEN_R_BRACE},
+        {"."   , TOKEN_DOT},
+        {","   , TOKEN_COMMA},
+        {";"   , TOKEN_SEMI_COLON},
 };
 
+static const keyword_t keywords[] = {
+        {"fun"    , TOKEN_FUNCTION},
+        {"True"   , TOKEN_TRUE},
+        {"False"  , TOKEN_FALSE},
+        {"mut"    , TOKEN_MUTABLE},
+        {"use"    , TOKEN_IMPORT},
+        {"return" , TOKEN_RETURN},
+        {"for"    , TOKEN_FOR},
+        {"while"  , TOKEN_WHILE},
+        {"break"  , TOKEN_BREAK},
+        {"cont"   , TOKEN_CONTINUE},
+        {"if"     , TOKEN_IF},
+        {"unless" , TOKEN_UNLESS},
+        {"else"   , TOKEN_ELSE},
+};
 
 lexer_t *lex(char *target_file, char *source)
 {
@@ -64,157 +113,28 @@ lexer_t *lex(char *target_file, char *source)
         while (peek(lexer) != '\0') {
                 lexer->start = lexer->pos;
 
-                c = eat(lexer);
-                switch (c) {
-                case '(':
-                           if (peek(lexer) == '*')
-                                   read_comment(lexer);
-                           else
-                                   emit_token(lexer, TOKEN_L_PAREN);
-                           break;
-                case ')': emit_token(lexer, TOKEN_R_PAREN); break;
-                case '[': emit_token(lexer, TOKEN_L_BRACKET); break;
-                case ']': emit_token(lexer, TOKEN_R_BRACKET); break;
-                case '{': emit_token(lexer, TOKEN_L_BRACE); break;
-                case '}': emit_token(lexer, TOKEN_R_BRACE); break;
-                case '.': emit_token(lexer, TOKEN_DOT); break;
-                case ',': emit_token(lexer, TOKEN_COMMA); break;
-                case ';': emit_token(lexer, TOKEN_SEMI_COLON); break;
-                case '"': read_string(lexer, '"'); break;
-                case '\'': read_string(lexer, '\''); break;
-                case '\n': ++lexer->line_n; break;
-                case '_':
-                        if (is_ident(peek(lexer))) {
-                                read_ident(lexer);
-                        } else {
-                                emit_token(lexer, TOKEN_UNDERSCORE);
-                        }
-                        break;
-                case '+':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_PLUS_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_PLUS);
-                        }
-                        break;
-                case '-':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_MIN_EQ);
-                        } else if (peek(lexer) == '>'){
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_R_ARROW);
-                        } else {
-                                emit_token(lexer, TOKEN_MIN);
-                        }
-                        break;
-                case '*':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_STAR_EQ);
-                        } else if (peek(lexer) == '*'){
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_STAR_STAR);
-                        } else {
-                                emit_token(lexer, TOKEN_STAR);
-                        }
-                        break;
-                case '%':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_MOD_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_MOD); break;
-                        }
-                case '&':
-                        if (peek(lexer) == '&') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_AND);
-                        } else if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_B_AND_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_B_AND);
-                        }
-                        break;
-                case '|':
-                        if (peek(lexer) == '|') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_OR);
-                        } else if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_B_OR_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_B_OR);
-                        }
-                        break;
-                case '~':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_B_NOT_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_B_NOT);
-                        }
-                        break;
-                case '^':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_B_XOR_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_B_XOR);
-                        }
-                        break;
-                case '/':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_SLASH_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_SLASH);
-                        }
-                        break;
-                case '!':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_NOT_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_BANG);
-                        }
-                        break;
-                case '<':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_LT_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_LT);
-                        }
+                /* check for comment block */
+                if (strncmp(lexer->source + lexer->pos, "(*", 2) == 0) {
+                        read_comment(lexer);
+                        continue;
+                }
 
-                        break;
-                case '>':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_GT_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_GT);
-                        }
+                /* check for symbols */
+                if (read_symbol(lexer))
+                        continue;
 
-                        break;
-                case '=':
-                        if (peek(lexer) == '=') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_EQ_EQ);
-                        } else {
-                                emit_token(lexer, TOKEN_EQ);
-                        }
+                c = eat(lexer);
 
+                /* special cases like strings, numbers, identifiers and whitespace */
+                switch (c) {
+                case '"':
+                        read_chars(lexer, '"');
                         break;
-                case ':':
-                        if (peek(lexer) == ':') {
-                                eat(lexer);
-                                emit_token(lexer, TOKEN_NS_SEP);
-                        } else {
-                                emit_token(lexer, TOKEN_COLON);
-                        }
+                case '\'':
+                        read_chars(lexer, '\'');
+                        break;
+                case '\n':
+                        ++lexer->line_n;
                         break;
                 case ' ':
                 case '\t':
@@ -229,8 +149,7 @@ lexer_t *lex(char *target_file, char *source)
                                 read_number(lexer);
                         } else {
                                 emit_token(lexer, TOKEN_ERROR);
-                                file_fatal_error("erroneous token '%c' on line %zu\n",
-                                                 c, lexer->line_n);
+                                file_fatal_error(lexer->line_n, "erroneous token '%c'\n", c);
                         }
                 }
         }
@@ -289,18 +208,6 @@ static void emit_token(lexer_t *lexer, tokentype type)
         lexer->tail = token;
 }
 
-static tokentype get_keyword(lexer_t *lexer)
-{
-        char *keyword     = token_value(lexer->source, lexer->start, lexer->pos);
-        size_t n_keywords = sizeof(keywords) / sizeof(keywords[0]);
-
-        for (size_t i = 0; i < n_keywords; ++i)
-                if (strcmp(keyword, keywords[i].name) == 0)
-                        return keywords[i].type;
-
-        return TOKEN_IDENT;
-}
-
 static bool is_number(char c)
 {
         return (c >= '0' && c <= '9') || c == '.';
@@ -316,18 +223,55 @@ static bool is_whitespace(char c)
         return c == ' ' || c == '\t' || c == '\r';
 }
 
-static void read_string(lexer_t *lexer, char id)
+static void read_chars(lexer_t *lexer, char id)
 {
+        bool is_string = id == '\"';
+
         while (eat(lexer) != id) {
-                /* allow escaped quotation chars in strings */
+                /* allow escaped quotation chars in strings and chars */
                 if (lexer->source[lexer->pos - 1] == '\\')
                         eat(lexer);
 
                 if (peek(lexer) == EOF)
-                        file_fatal_error("unexpected EOF while scanning string\n");
+                        file_fatal_error(lexer->line_n, "unexpected EOF while scanning string\n");
+        }
+
+        if (is_string)
+                emit_token(lexer, TOKEN_STRING);
+        else
+                emit_token(lexer, TOKEN_CHAR);
+}
+
+static bool read_symbol(lexer_t *lexer)
+{
+        size_t n_symbols = sizeof(symbols) / sizeof(symbols[0]);
+        keyword_t token = {NULL, TOKEN_ERROR};
+
+        for (size_t i = 0; i < n_symbols; ++i)
+                if (strncmp(lexer->source + lexer->pos, symbols[i].name, strlen(symbols[i].name)) == 0)
+                        token = symbols[i];
+
+        if (token.name) {
+                for (size_t j = 0; j < strlen(token.name); ++j)
+                        eat(lexer);
+
+                emit_token(lexer, token.type);
+                return true;
         }
 
-        emit_token(lexer, TOKEN_STRING);
+        return false;
+}
+
+static tokentype get_keyword(lexer_t *lexer)
+{
+        char *keyword     = token_value(lexer->source, lexer->start, lexer->pos);
+        size_t n_keywords = sizeof(keywords) / sizeof(keywords[0]);
+
+        for (size_t i = 0; i < n_keywords; ++i)
+                if (strcmp(keyword, keywords[i].name) == 0)
+                        return keywords[i].type;
+
+        return TOKEN_IDENT;
 }
 
 static void read_number(lexer_t *lexer)
@@ -352,7 +296,7 @@ static void read_comment(lexer_t *lexer)
 
         while (1) {
                 if (peek(lexer) == EOF)
-                        file_fatal_error("unterminated comment on line %d\n", start_line);
+                        file_fatal_error(lexer->line_n, "unterminated comment\n");
 
                 if (peek(lexer) == '\n')
                         ++lexer->line_n;

+ 0 - 30
src/lexer.h

@@ -25,36 +25,6 @@
 
 #include "token.h"
 
-#define FUNCTION_KEYWORD "fun"
-#define TRUE_KEYWORD     "True"
-#define FALSE_KEYWORD    "False"
-#define MUTABLE_KEYWORD  "mut"
-#define IMPORT_KEYWORD   "use"
-#define RETURN_KEYWORD   "return"
-#define FOR_KEYWORD      "for"
-#define WHILE_KEYWORD    "while"
-#define BREAK_KEYWORD    "break"
-#define CONTINUE_KEYWORD "continue"
-#define IF_KEYWORD       "if"
-#define UNLESS_KEYWORD   "unless"
-#define ELSE_KEYWORD     "else"
-
-#define INT_KEYWORD    "Int"
-#define I8_KEYWORD     "Int8"
-#define I16_KEYWORD    "Int16"
-#define I32_KEYWORD    "Int32"
-#define I64_KEYWORD    "Int64"
-#define U8_KEYWORD     "Int8"
-#define U16_KEYWORD    "UInt16"
-#define U32_KEYWORD    "UInt32"
-#define U64_KEYWORD    "UInt64"
-#define STRING_KEYWORD "Str"
-#define BOOL_KEYWORD   "Bool"
-#define FLOAT_KEYWORD  "Float"
-#define F32_KEYWORD    "Float32"
-#define F64_KEYWORD    "Float64"
-#define NONE_KEYWORD   "None"
-
 typedef struct {
         char *target_file;
         char *source;

+ 2 - 5
src/main.c

@@ -46,8 +46,6 @@ static const char *USAGE =
 
 int main(int argc, char *argv[])
 {
-        char *input_file;
-
         if (!(optind < argc))
                 erupt_fatal_error("an input file is required.\n");
 
@@ -58,8 +56,7 @@ int main(int argc, char *argv[])
 
         TARGET_FILE = argv[optind];
 
-        input_file = read_file(argv[optind]);
-
+        char *input_file = read_file(argv[optind]);
         lexer_t *lexer = lex(argv[optind], input_file);
 
         if (SHOW_TOKENS)
@@ -100,7 +97,7 @@ static char *read_file(const char *path)
         if (S_ISDIR(s.st_mode))
                 erupt_fatal_error("'%s' is a directory\n", path);
 
-        buffer = malloc(MAX_FILE_SIZE);
+        buffer = smalloc(MAX_FILE_SIZE);
         read   = fread(buffer, sizeof(char), MAX_FILE_SIZE, handler);
 
         buffer[read] = '\0';

+ 97 - 58
src/parser.c

@@ -31,13 +31,17 @@ static ast_node_t *parse_call(token_t *ts);
 static ast_node_t *parse_var(token_t *ts);
 static ast_node_t *parse_if(token_t *ts);
 static ast_node_t *parse_expr(token_t *ts, tokentype delimiter);
+static ast_node_t *rpnstack_to_ast(stack_t *rpn);
 static void emit_node(ast_node_list_t *nl, ast_node_t *node);
 static token_t *peek(token_t *ts);
 static token_t *eat(token_t *ts);
 
 bool PARSE_FAILED = false;
 
-static const match_type_t data_types[] = {
+static const struct {
+        char *name;
+        ast_type_t type;
+} data_types[] = {
         {INT_KEYWORD    , INT},
         {I8_KEYWORD     , I8},
         {I16_KEYWORD    , I16},
@@ -48,6 +52,7 @@ static const match_type_t data_types[] = {
         {U32_KEYWORD    , U32},
         {U64_KEYWORD    , U64},
         {STRING_KEYWORD , STR},
+        {CHAR_KEYWORD   , CHAR},
         {BOOL_KEYWORD   , BOOL},
         {FLOAT_KEYWORD  , FLOAT},
         {F32_KEYWORD    , F32},
@@ -82,17 +87,22 @@ static const ast_op_t operators[] = {
         {TOKEN_LT_EQ     , 7  , ASSOC_LEFT  , false},
         {TOKEN_GT_EQ     , 7  , ASSOC_LEFT  , false},
 
+        /* bitwise shifts */
+        {TOKEN_L_SHIFT   , 7  , ASSOC_RIGHT , false},
+        {TOKEN_R_SHIFT   , 7  , ASSOC_RIGHT , false},
+
         /* mathematical operators */
-        {TOKEN_PLUS      , 8  , ASSOC_LEFT  , false},
-        {TOKEN_MIN       , 8  , ASSOC_LEFT  , false},
-        {TOKEN_STAR      , 9  , ASSOC_LEFT  , false},
-        {TOKEN_SLASH     , 9  , ASSOC_LEFT  , false},
-        {TOKEN_MOD       , 9  , ASSOC_LEFT  , false},
-        {TOKEN_STAR_STAR , 10 , ASSOC_RIGHT , false},
+        {TOKEN_PLUS      , 9  , ASSOC_LEFT  , false},
+        {TOKEN_MIN       , 9  , ASSOC_LEFT  , false},
+        {TOKEN_STAR      , 10 , ASSOC_LEFT  , false},
+        {TOKEN_SLASH     , 10 , ASSOC_LEFT  , false},
+        {TOKEN_MOD       , 10 , ASSOC_LEFT  , false},
+        {TOKEN_STAR_STAR , 11 , ASSOC_RIGHT , false},
 
         /* unary operators */
-        {TOKEN_BANG      , 11 , ASSOC_RIGHT , true},
-        {TOKEN_B_NOT     , 11 , ASSOC_RIGHT , true},
+        {TOKEN_MIN       , 12 , ASSOC_RIGHT , true},
+        {TOKEN_BANG      , 12 , ASSOC_RIGHT , true},
+        {TOKEN_B_NOT     , 12 , ASSOC_RIGHT , true},
 
         /* misc */
         {TOKEN_L_PAREN   , 20 , ASSOC_NONE  , false}
@@ -109,15 +119,15 @@ static bool is_operator(tokentype token)
         return false;
 }
 
-static ast_op_t get_operator(tokentype token)
+static const ast_op_t *get_operator(tokentype token)
 {
         size_t ops_len = sizeof(operators) / sizeof(operators[0]);
 
         for (size_t i = 0; i < ops_len; ++i)
                 if (token == operators[i].symbol)
-                        return operators[i];
+                        return &operators[i];
 
-        return (ast_op_t) { TOKEN_ERROR, 0, ASSOC_NONE, false };
+        return NULL;
 }
 
 static ast_type_t get_type(char *name)
@@ -153,8 +163,7 @@ static ast_node_t *parse_top_level(token_t *ts)
         case TOKEN_IMPORT:
                 /* TODO: parse import */
         default:
-                parser_file_error("unexpected '%s' on line %zu\n",
-                                  ts->value, ts->line_n);
+                parser_file_error(ts->line_n, "unexpected '%s'\n", ts->value);
         }
 
         return NULL;
@@ -164,7 +173,11 @@ static ast_node_t *parse_top_level(token_t *ts)
  * parse a block level declaration
  *
  * possible syntax:
- *      alot
+ *      mut Char var ...
+ *      x += 3
+ *      if x < 5 ... | unless x < 5 ...
+ *      while x < 5 ...
+ *      return x
  */
 static ast_node_t *parse_block_level(token_t *ts)
 {
@@ -181,11 +194,9 @@ static ast_node_t *parse_block_level(token_t *ts)
         case TOKEN_RETURN:
                 /* TODO: parse return */
         case TOKEN_EOF:
-                file_fatal_error("unclosed block on line %zu\n",
-                                 ts->line_n);
+                file_fatal_error(ts->line_n, "unclosed block\n");
         default:
-                parser_file_error("unexpected '%s' on line %zu\n",
-                                  ts->value, ts->line_n);
+                parser_file_error(ts->line_n, "unexpected '%s'\n", ts->value);
         }
 
         return NULL;
@@ -198,8 +209,7 @@ static ast_node_list_t *parse_block(token_t *ts)
 
         /* empty body */
         if (ts->type == TOKEN_R_BRACE) {
-                file_warning("empty block on line %zu\n",
-                             ts->line_n);
+                file_warning(ts->line_n, "empty block\n");
                 return NULL;
         }
 
@@ -238,7 +248,7 @@ static ast_node_t *parse_fn(token_t *ts)
         } else if (ts->type == TOKEN_L_BRACE) {
                 body = parse_block(ts);
         } else {
-                parser_file_error("expected semi-colon or block on line %zu\n", ts->line_n);
+                parser_file_error(ts->line_n, "expected semi-colon or block");
         }
 
         return create_fn(prototype, body);
@@ -261,6 +271,21 @@ static ast_node_t *parse_ident(token_t *ts)
 }
 
 /*
+ * a token in an expression can be either an identifier or number. this creates
+ * the appropriate one
+ */
+static ast_node_t *parse_expr_val(token_t token)
+{
+        if (token.type == TOKEN_IDENT)
+                return create_var(token.value, UNKNOWN, NULL, NULL);
+
+        if (token.type == TOKEN_NUMBER)
+                return create_number(strtof(token.value, NULL));
+
+        return NULL;
+}
+
+/*
  * fun func(...) -> Int32 ...
  * fun func(...) ...
  * fun func ...
@@ -275,8 +300,7 @@ static ast_node_t *parse_fn_proto(token_t *ts)
         eat(ts);
 
         if (ts->type != TOKEN_IDENT)
-                parser_file_error("illegal function name '%s' on line %zu\n",
-                                  ts->value, ts->line_n);
+                parser_file_error(ts->line_n, "illegal function name '%s'", ts->value);
 
         name = ts->value;
 
@@ -289,13 +313,12 @@ static ast_node_t *parse_fn_proto(token_t *ts)
         if (peek(ts)->type == TOKEN_R_ARROW) {
                 eat(ts);
                 if ((data_type = get_type(eat(ts)->value)) == UNKNOWN)
-                        parser_file_error("unknown data type '%s' on line %zu in function '%s'\n",
-                                          ts->value, ts->line_n, name);
+                        parser_file_error(ts->line_n, "unknown data type '%s' function '%s'\n",
+                                          ts->value, name);
         } else {
                 /* warn the user if a function besides main doesnt explicitly declare a return type */
                 if (strcmp(name, "main") != 0)
-                        file_warning("function '%s' has no return type set on line %zu\n",
-                                     name, ts->line_n);
+                        file_warning(ts->line_n, "function '%s' has no return type set\n", name);
         }
 
         /* eat the ) or type declaration token */
@@ -307,8 +330,7 @@ static ast_node_t *parse_fn_proto(token_t *ts)
 static ast_node_list_t *parse_fn_params(token_t *ts)
 {
         if (ts->type == TOKEN_EOF)
-                parser_file_error("unclosed function parameters on line %zu\n",
-                                  ts->line_n);
+                parser_file_error(ts->line_n, "unclosed function parameters\n");
 
         while (ts->type != TOKEN_R_PAREN)
                 eat(ts);
@@ -341,12 +363,10 @@ static ast_node_t *parse_var(token_t *ts)
                 eat(ts);
 
         if ((type = get_type(ts->value)) == UNKNOWN)
-                parser_file_error("unknown data type '%s' on line %zu\n",
-                                  ts->value, ts->line_n);
+                parser_file_error(ts->line_n, "unknown data type '%s'\n", ts->value);
 
         if (eat(ts)->type != TOKEN_IDENT)
-                parser_file_error("illegal variable name '%s' on line %zu\n",
-                                  ts->value, ts->line_n);
+                parser_file_error(ts->line_n, "illegal variable name '%s'\n", ts->value);
 
         name = ts->value;
 
@@ -357,8 +377,7 @@ static ast_node_t *parse_var(token_t *ts)
         else if (is_operator(ts->type))
                 return create_var(name, type, mutable, parse_expr(ts, TOKEN_SEMI_COLON));
 
-        parser_file_error("expected semi-colon or assignment, got '%s' on line %zu\n",
-                          ts->value, ts->line_n);
+        parser_file_error(ts->line_n, "expected semi-colon or assignment, got '%s'\n", ts->value);
         return create_var(name, type, mutable, NULL);
 }
 
@@ -388,9 +407,8 @@ static ast_node_t *parse_if(token_t *ts)
 
 static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
 {
-        /* eat assignment token */
-        eat(ts);
-
+        /* shut up when an error gets printed so we don't spam the console */
+        bool shut_up = false;
         size_t line_start = ts->line_n;
         stack_t *operators = smalloc(sizeof(stack_t));
         stack_t *output = smalloc(sizeof(stack_t));
@@ -400,13 +418,18 @@ static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
                         /* an identifier or number is expected after an
                            operator, except when the current operator is
                            anything but left-associated */
-                        if (is_operator(prev.type) && get_operator(ts->type).assoc == ASSOC_LEFT)
-                                parser_file_error("unexpected '%s' in expression on line %zu\n",
-                                                  ts->value, ts->line_n);
+                        if (is_operator(prev.type) &&
+                            get_operator(ts->type)->assoc == ASSOC_LEFT) {
+                                if (!shut_up) {
+                                        parser_file_error(ts->line_n, "unexpected '%s' in expression\n",
+                                                          ts->value);
+                                        shut_up = true;
+                                }
+                        }
                         if (stack_size(operators) >= 2 &&
-                            get_operator(ts->type).assoc == ASSOC_LEFT &&
-                            get_operator(ts->type).precedence >=
-                            get_operator(stack_top(&operators).type).precedence) {
+                            get_operator(ts->type)->assoc == ASSOC_LEFT &&
+                            get_operator(ts->type)->precedence >=
+                            get_operator(stack_top(&operators).type)->precedence) {
                                 stack_push(&output, stack_pop(&operators));
                         }
 
@@ -414,10 +437,12 @@ static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
                 } else if (ts->type == TOKEN_IDENT || ts->type == TOKEN_NUMBER) {
                         /* an operator is expected after an identifier, number
                            or (. any other token is invalid */
-                        if (prev.type == TOKEN_IDENT || prev.type == TOKEN_NUMBER ||
-                            prev.type == TOKEN_R_PAREN)
-                                parser_file_error("unexpected '%s' in expression on line %zu\n",
-                                                  ts->value, ts->line_n);
+                        if ((prev.type == TOKEN_IDENT || prev.type == TOKEN_NUMBER ||
+                            prev.type == TOKEN_R_PAREN) && !shut_up) {
+                                parser_file_error(ts->line_n, "unexpected '%s' in expression\n",
+                                                  ts->value);
+                                shut_up = true;
+                        }
                         stack_push(&output, *ts);
                 } else if (ts->type == TOKEN_IF || ts->type == TOKEN_UNLESS) {
                         bool is_unless = ts->type == TOKEN_UNLESS;
@@ -429,9 +454,9 @@ static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
                 } else if (ts->type == TOKEN_R_PAREN) {
                         /* pop all operators to output stack until ( is found */
                         while (stack_top(&operators).type != TOKEN_L_PAREN) {
-                                if (!stack_top(&operators).type) {
-                                        parser_file_error("unmatched ')' in expression on line %zu\n",
-                                                          ts->line_n);
+                                if (!stack_top(&operators).type && !shut_up) {
+                                        parser_file_error(ts->line_n, "unmatched ')' in expression\n");
+                                        shut_up = true;
                                         break;
                                 }
 
@@ -442,11 +467,13 @@ static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
                         stack_pop(&operators);
                 } else {
                         if (ts->type == TOKEN_EOF)
-                                file_fatal_error("unterminated expression on line %zu\n",
-                                                 line_start);
+                                file_fatal_error(line_start, "unterminated expression\n");
 
-                        parser_file_error("unexpected token '%s' in expression on line %zu\n",
-                                          ts->value, ts->line_n);
+                        if (!shut_up) {
+                                parser_file_error(ts->line_n, "unexpected token '%s' in expression\n",
+                                                  ts->value);
+                                shut_up = true;
+                        }
                 }
         }
 
@@ -456,9 +483,21 @@ static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
 
         stack_dump(output);
 
-        free(operators);
+        return rpnstack_to_ast(output);
+}
 
-        return NULL;
+static ast_node_t *rpnstack_to_ast(stack_t *rpn)
+{
+        if (!rpn)
+                return NULL;
+
+        ast_node_t *expression;
+
+        do {
+                printf("%s\n", stack_pop(&rpn).value);
+        } while (stack_size(rpn));
+
+        return expression;
 }
 
 static void emit_node(ast_node_list_t *nl, ast_node_t *node)

+ 0 - 0
src/parser.h


Деякі файли не було показано, через те що забагато файлів було змінено