6 Commits 39a050c6ec ... a5d76dd982

Author SHA1 Message Date
  soud a5d76dd982 ast debugging and nodes now get stored in an ast_node_list_t 9 years ago
  soud 04cbd7c83f all operators 9 years ago
  soud 551e15d07d using bools 9 years ago
  soud 297ae94afc clean up 9 years ago
  soud 5a47b6782a destroying lexer and old number clean up 9 years ago
  soud f816c8c22e strtol -> strtoul in unsigned integers 9 years ago
10 changed files with 248 additions and 104 deletions
  1. 3 3
      Makefile
  2. 47 49
      src/ast.c
  3. 29 12
      src/ast.h
  4. 4 5
      src/erupt.c
  5. 2 2
      src/erupt.h
  6. 30 15
      src/lexer.c
  7. 3 2
      src/lexer.h
  8. 7 5
      src/main.c
  9. 123 11
      src/parser.c
  10. 0 0
      src/parser.h

+ 3 - 3
Makefile

@@ -1,6 +1,6 @@
 CFLAGS=-Wall -Wextra -g -std=c11 `llvm-config --cflags` -c
 LDFLAGS=`llvm-config --cxxflags --ldflags`
-CFILES=src/*.c
+CFILES=$(wildcard src/*.c)
 
 .PHONY: all clean
 
@@ -11,8 +11,8 @@ all:
 	@-rm *.o
 
 clean:
-	@-rm *.o
-	@-rm -rf bin/
+	@rm *.o
+	@rm -rf bin/
 
 test:
 	@bin/erupt tests/*

+ 47 - 49
src/ast.c

@@ -23,7 +23,7 @@
 #include "erupt.h"
 #include "ast.h"
 
-ast_node_t *create_var_expr(char *name, ast_type_t data_type, char *value, bool mutable)
+ast_node_t *create_var(char *name, ast_type_t data_type, char *value, bool mutable)
 {
         ast_node_t *node = smalloc(sizeof(ast_node_t));
 
@@ -33,41 +33,49 @@ ast_node_t *create_var_expr(char *name, ast_type_t data_type, char *value, bool
         node->var.mutable = mutable;
 
         switch (data_type) {
-                case I8:
-                        node->var.int8_value = strtol(value, NULL, 10);
-                        break;
-                case I16:
-                        node->var.int16_value = strtol(value, NULL, 10);
-                        break;
-                case I32:
-                        node->var.int32_value = strtol(value, NULL, 10);
-                        break;
-                case I64:
-                        node->var.int64_value = strtoll(value, NULL, 10);
-                        break;
-                case U8:
-                        node->var.uint8_value = strtol(value, NULL, 10);
-                        break;
-                case U16:
-                        node->var.uint16_value = strtol(value, NULL, 10);
-                        break;
-                case U32:
-                        node->var.uint32_value = strtol(value, NULL, 10);
-                        break;
-                case U64:
-                        node->var.uint64_value = strtoll(value, NULL, 10);
-                        break;
-                case STR:
-                        node->var.str_value = strdup(value);
-                        break;
-                case BOOL:
-                        node->var.bool_value = true;
-                        break;
-                case FLOAT:
-                        node->var.float_value = strtof(value, NULL);
-                        break;
-                default:
-                        erupt_fatal_error("somehow got invalid data type\n");
+        case I8:
+                node->var.int8_value = strtol(value, NULL, 10);
+                break;
+        case I16:
+                node->var.int16_value = strtol(value, NULL, 10);
+                break;
+        case I32:
+                node->var.int32_value = strtol(value, NULL, 10);
+                break;
+        case I64:
+                node->var.int64_value = strtoll(value, NULL, 10);
+                break;
+        case U8:
+                node->var.uint8_value = strtoul(value, NULL, 10);
+                break;
+        case U16:
+                node->var.uint16_value = strtoul(value, NULL, 10);
+                break;
+        case U32:
+                node->var.uint32_value = strtoul(value, NULL, 10);
+                break;
+        case U64:
+                node->var.uint64_value = strtoull(value, NULL, 10);
+                break;
+        case STR:
+                node->var.str_value = strdup(value);
+                break;
+        case BOOL: {
+                bool state = NULL;
+
+                if (strcmp(value, TRUE_KEYWORD) == 0)
+                        state = true;
+                else if (strcmp(value, FALSE_KEYWORD) == 0)
+                        state = false;
+
+                node->var.bool_value = state;
+                break;
+        }
+        case FLOAT:
+                node->var.float_value = strtof(value, NULL);
+                break;
+        default:
+                erupt_fatal_error("somehow got invalid data type\n");
         }
 
         return node;
@@ -102,16 +110,6 @@ ast_node_t *create_fn(ast_node_t *prototype, ast_node_t *body)
         return node;
 }
 
-ast_node_t *create_number(double value)
-{
-        ast_node_t *node = smalloc(sizeof(ast_node_t));
-
-        node->type = TYPE_NUMBER;
-        node->number.value = value;
-
-        return node;
-}
-
 ast_node_t *create_call(char *name, ast_node_t **args, size_t arg_count)
 {
         ast_node_t *node = smalloc(sizeof(ast_node_t));
@@ -142,7 +140,7 @@ ast_node_t *create_if(ast_node_t *condition, ast_node_t *true_body,
         return node;
 }
 
-ast_node_t *create_bin_exp(ast_bin_op_t operator, ast_node_t *lhs, ast_node_t *rhs)
+ast_node_t *create_bin_exp(ast_op_t operator, ast_node_t *lhs, ast_node_t *rhs)
 {
         ast_node_t *node = smalloc(sizeof(ast_node_t));
 
@@ -154,7 +152,7 @@ ast_node_t *create_bin_exp(ast_bin_op_t operator, ast_node_t *lhs, ast_node_t *r
         return node;
 }
 
-void r_free_nodes(ast_node_t *node)
+void destroy_ast(ast_node_t *node)
 {
         switch(node->type) {
         case TYPE_VAR:
@@ -177,7 +175,6 @@ void r_free_nodes(ast_node_t *node)
                 if (node->fn.body)
                         free(node->fn.body);
                 break;
-        case TYPE_NUMBER: break;
         case TYPE_CALL:
                 if (node->call.name)
                         free(node->call.name);
@@ -206,4 +203,5 @@ void r_free_nodes(ast_node_t *node)
         }
 
         free(node);
+        verbose_printf("destroyed ast\n");
 }

+ 29 - 12
src/ast.h

@@ -26,6 +26,8 @@
 #include <stdint.h>
 #include <limits.h>
 
+#include "lexer.h"
+
 typedef enum {
         I8,
         I16,
@@ -43,11 +45,32 @@ typedef enum {
 } ast_type_t;
 
 typedef enum {
+        /* math operators */
         OP_PLUS,
         OP_MIN,
         OP_DIV,
-        OP_MUL
-} ast_bin_op_t;
+        OP_MUL,
+        OP_MOD,
+
+        /* logical operators */
+        OP_AND,
+        OP_OR,
+        OP_NOT,
+
+        /* relational operators */
+        OP_NOT_EQ,
+        OP_EQ,
+        OP_GT,
+        OP_GT_EQ,
+        OP_LT,
+        OP_LT_EQ,
+
+        /* bitwise operators */
+        OP_BIN_OR,
+        OP_BIN_XOR,
+        OP_BIN_AND,
+        OP_BIN_NOT
+} ast_op_t;
 
 struct ast_node;
 
@@ -86,10 +109,6 @@ typedef struct {
 } ast_fn_t;
 
 typedef struct {
-        double value;
-} ast_number_t;
-
-typedef struct {
         char *name;
         struct ast_node **args;
         size_t arg_count;
@@ -102,7 +121,7 @@ typedef struct {
 } ast_if_t;
 
 typedef struct {
-        ast_bin_op_t operator;
+        ast_op_t operator;
 
         struct ast_node *lhs;
         struct ast_node *rhs;
@@ -113,7 +132,6 @@ typedef struct ast_node {
                 TYPE_VAR,
                 TYPE_PROTO,
                 TYPE_FN,
-                TYPE_NUMBER,
                 TYPE_CALL,
                 TYPE_IF,
                 TYPE_BIN_EXP
@@ -123,19 +141,18 @@ typedef struct ast_node {
                 ast_var_t var;
                 ast_proto_t prototype;
                 ast_fn_t fn;
-                ast_number_t number;
                 ast_call_t call;
                 ast_if_t if_exp;
                 ast_bin_exp_t bin_exp;
         };
 } ast_node_t;
 
-ast_node_t *create_var_expr(char *name, ast_type_t data_type, char *value, bool mutable);
+ast_node_t *create_var(char *name, ast_type_t data_type, char *value, bool mutable);
 ast_node_t *create_fn_proto(char *name, ast_var_t **args, size_t arg_count, ast_type_t data_type);
 ast_node_t *create_fn(ast_node_t *prototype, ast_node_t *body);
-ast_node_t *create_number(double value);
 ast_node_t *create_call(char *name, ast_node_t **args, size_t arg_count);
 ast_node_t *create_if(ast_node_t *condition, ast_node_t *true_body, ast_node_t *false_body);
-ast_node_t *create_bin_exp(ast_bin_op_t operator, ast_node_t *lhs, ast_node_t *rhs);
+ast_node_t *create_bin_exp(ast_op_t operator, ast_node_t *lhs, ast_node_t *rhs);
+void destroy_ast(ast_node_t *node);
 
 #endif /* !AST_H */

+ 4 - 5
src/erupt.c

@@ -21,11 +21,10 @@
  */
 
 #include <stdarg.h>
-
 #include "erupt.h"
 
-int VERBOSE = 0;
-int SHOW_TOKENS = 0;
+bool VERBOSE = 0;
+bool SHOW_TOKENS = 0;
 char *TARGET_FILE = "unknown";
 char *OUTPUT_NAME = "main";
 
@@ -63,8 +62,8 @@ void verbose_printf(const char *fmt, ...)
         va_list arg;
         va_start(arg, fmt);
 
-        fprintf(stdout, "[*] ");
-        vfprintf(stdout, fmt, arg);
+        printf("[*] ");
+        vprintf(fmt, arg);
 
         va_end(arg);
 }

+ 2 - 2
src/erupt.h

@@ -56,8 +56,8 @@
         #define ORANGE(x) ("\x1B[33m" x "\x1B[00m")
 #endif
 
-extern int VERBOSE;
-extern int SHOW_TOKENS;
+extern bool VERBOSE;
+extern bool SHOW_TOKENS;
 extern char *OUTPUT_NAME;
 extern char *TARGET_FILE;
 

+ 30 - 15
src/lexer.c

@@ -28,8 +28,8 @@ static char peek(lexer_t *lexer);
 static char eat(lexer_t *lexer);
 static void emit_token(lexer_t *lexer, tokentype type);
 static tokentype get_keyword(lexer_t *lexer);
-static int is_int(char c);
-static int is_ident(char c);
+static bool is_int(char c);
+static bool is_ident(char c);
 static void read_string(lexer_t *lexer, char id);
 static void read_int(lexer_t *lexer);
 static void read_ident(lexer_t *lexer);
@@ -52,7 +52,7 @@ static keyword_t keywords[] = {
 };
 
 
-token_t *tokenize(char *target_file, char *source)
+lexer_t *tokenize(char *target_file, char *source)
 {
         char c;
         lexer_t *lexer = create_lexer(target_file, source);
@@ -72,9 +72,11 @@ token_t *tokenize(char *target_file, char *source)
                 case '}': emit_token(lexer, TOKEN_R_BRACE); break;
                 case '.': emit_token(lexer, TOKEN_DOT); break;
                 case ',': emit_token(lexer, TOKEN_COMMA); break;
+                case '%': emit_token(lexer, TOKEN_MOD); break;
                 case ';': emit_token(lexer, TOKEN_SEMI_COLON); break;
                 case '"': read_string(lexer, '"'); break;
                 case '\'': read_string(lexer, '\''); break;
+                case '\n': ++lexer->line_n; break;
                 case '+':
                         if (peek(lexer) == '=') {
                                 eat(lexer);
@@ -99,9 +101,6 @@ token_t *tokenize(char *target_file, char *source)
                                 emit_token(lexer, TOKEN_STAR);
                         }
                         break;
-                case '%':
-                        emit_token(lexer, TOKEN_MOD);
-                        break;
                 case '&':
                         if (peek(lexer) == '&') {
                                 eat(lexer);
@@ -199,9 +198,6 @@ token_t *tokenize(char *target_file, char *source)
                         while (peek(lexer) == ' ')
                                 eat(lexer);
                         break;
-                case '\n':
-                        ++lexer->line_n;
-                        break;
                 default:
                         if (is_ident(c)) {
                                 read_ident(lexer);
@@ -220,7 +216,7 @@ token_t *tokenize(char *target_file, char *source)
 
         verbose_printf("lexical analysis done\n");
 
-        return lexer->head;
+        return lexer;
 }
 
 static lexer_t *create_lexer(char *target_file, char *source)
@@ -232,7 +228,7 @@ static lexer_t *create_lexer(char *target_file, char *source)
         lexer->line_n      = 1;
         lexer->start       = 0;
         lexer->pos         = 0;
-        lexer->head        = NULL;
+        lexer->ts          = NULL;
         lexer->tail        = NULL;
 
         return lexer;
@@ -262,7 +258,7 @@ static void emit_token(lexer_t *lexer, tokentype type)
         token->next = NULL;
 
         if (lexer->tail == NULL)
-                lexer->head = token;
+                lexer->ts = token;
         else
                 lexer->tail->next = token;
 
@@ -281,12 +277,12 @@ static tokentype get_keyword(lexer_t *lexer)
         return TOKEN_IDENT;
 }
 
-static int is_int(char c)
+static bool is_int(char c)
 {
         return c >= '0' && c <= '9';
 }
 
-static int is_ident(char c)
+static bool is_ident(char c)
 {
         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
 }
@@ -340,8 +336,27 @@ static void read_long_comment(lexer_t *lexer)
 
                 if (eat(lexer) == '*' && peek(lexer) == '/') {
                         eat(lexer);
-                        verbose_printf("\tskipped long comment\n");
+                        verbose_printf("skipped long comment\n");
                         break;
                 }
         }
 }
+
+void destroy_lexer(lexer_t *lexer)
+{
+        if (lexer->ts != NULL) {
+                do {
+                        if (lexer->ts->value != NULL)
+                                free(lexer->ts->value);
+
+                        if (lexer->ts->prev != NULL)
+                                free(lexer->ts->prev);
+                } while((lexer->ts = lexer->ts->next));
+        }
+
+        if (lexer->tail != NULL)
+                free(lexer->tail);
+
+        free(lexer);
+        verbose_printf("destroyed lexer\n");
+}

+ 3 - 2
src/lexer.h

@@ -59,7 +59,7 @@ typedef struct {
         size_t start;
         size_t pos;
 
-        token_t *head;
+        token_t *ts;
         token_t *tail;
 } lexer_t;
 
@@ -68,6 +68,7 @@ typedef struct {
         tokentype type;
 } keyword_t;
 
-token_t *tokenize(char *target_file, char *source);
+lexer_t *tokenize(char *target_file, char *source);
+void destroy_lexer(lexer_t *lexer);
 
 #endif /* !LEXER_H */

+ 7 - 5
src/main.c

@@ -59,14 +59,16 @@ int main(int argc, char *argv[])
 
         input_file = read_file(argv[optind]);
 
-        token_t *token_stream = tokenize(argv[optind], input_file);
+        lexer_t *lexer = tokenize(argv[optind], input_file);
 
         if (SHOW_TOKENS)
-                dump_tokens(token_stream);
+                dump_tokens(lexer->ts);
 
         free(input_file);
 
-        parse_token_stream(token_stream);
+        parse_token_stream(lexer->ts);
+
+        destroy_lexer(lexer);
 
         return ERUPT_OK;
 }
@@ -130,11 +132,11 @@ static int get_options(int argc, char *argv[])
                                 return ERUPT_ERROR;
 
                         case 'V':
-                                VERBOSE = 1;
+                                VERBOSE = true;
                                 break;
 
                         case 't':
-                                SHOW_TOKENS = 1;
+                                SHOW_TOKENS = true;
                                 break;
 
                         default:

+ 123 - 11
src/parser.c

@@ -29,6 +29,9 @@ static ast_node_t *parse_fn_proto(token_t *ts);
 static ast_node_t *parse_fn_params(token_t *ts);
 static ast_node_t *parse_call(token_t *ts);
 static ast_node_t *parse_var(token_t *ts);
+static void emit_node(ast_node_list_t *nl, ast_node_t *node);
+static void dump_node(ast_node_t *node);
+static void dump_nodes(ast_node_list_t *nl);
 static token_t *peek(token_t *ts);
 static token_t *eat(token_t *ts);
 
@@ -60,6 +63,51 @@ static ast_type_t get_type(char *name)
         return UNKNOWN;
 }
 
+static ast_op_t get_operator(token_t *token)
+{
+        switch (token->type) {
+        case TOKEN_PLUS:
+                return OP_PLUS;
+        case TOKEN_MIN:
+                return OP_MIN;
+        case TOKEN_SLASH:
+                return OP_DIV;
+        case TOKEN_STAR:
+                return OP_MUL;
+        case TOKEN_MOD:
+                return OP_MOD;
+        case TOKEN_AND:
+                return OP_AND;
+        case TOKEN_OR:
+                return OP_OR;
+        case TOKEN_BANG:
+                return OP_NOT;
+        case TOKEN_NOT_EQ:
+                return OP_NOT_EQ;
+        case TOKEN_EQ_EQ:
+                return OP_EQ;
+        case TOKEN_GT:
+                return OP_GT;
+        case TOKEN_GT_EQ:
+                return OP_GT_EQ;
+        case TOKEN_LT:
+                return OP_GT;
+        case TOKEN_LT_EQ:
+                return OP_GT_EQ;
+        case TOKEN_B_OR:
+                return OP_BIN_OR;
+        case TOKEN_B_XOR:
+                return OP_BIN_XOR;
+        case TOKEN_B_AND:
+                return OP_BIN_AND;
+        case TOKEN_B_NOT:
+                return OP_BIN_NOT;
+        default:
+                file_fatal_error("invalid operator '%s' on line %zu\n",
+                                 token->value, token->line_n);
+        }
+}
+
 static ast_node_t *parse_expr(token_t *ts)
 {
         switch (ts->type) {
@@ -69,7 +117,10 @@ static ast_node_t *parse_expr(token_t *ts)
                 return parse_var(ts);
         case TOKEN_IDENT:
                 return parse_id(ts);
+        case TOKEN_IMPORT:
+                /* TODO: parse import */
         default:
+                return NULL;
                 file_fatal_error("unexpected token '%s' on line %zu\n",
                                  ts->value, ts->line_n);
         }
@@ -79,13 +130,17 @@ static ast_node_t *parse_expr(token_t *ts)
 
 int parse_token_stream(token_t *ts)
 {
+        ast_node_list_t *nl = smalloc(sizeof(ast_node_list_t));
+
         verbose_printf("generating abstract syntax tree\n");
 
         do {
-                parse_expr(ts);
+                emit_node(nl, parse_expr(ts));
                 eat(ts);
         } while(ts->type != TOKEN_EOF);
 
+        dump_nodes(nl);
+
         return ERUPT_OK;
 }
 
@@ -95,6 +150,7 @@ static ast_node_t *parse_fn(token_t *ts)
 
         if (ts->type == TOKEN_SEMI_COLON) {
                 eat(ts);
+                return prototype;
         } else if (ts->type == TOKEN_L_BRACE) {
                 /* TODO: parse block */
         } else {
@@ -120,7 +176,7 @@ static ast_node_t *parse_fn_proto(token_t *ts)
         ast_var_t **args = NULL;
         size_t arg_count = 0;
 
-        /* skip over function declaration token */
+        /* eat function declaration token */
         eat(ts);
 
         if (ts->type != TOKEN_IDENT)
@@ -130,7 +186,7 @@ static ast_node_t *parse_fn_proto(token_t *ts)
         name = ts->value;
 
         if (eat(ts)->type != TOKEN_L_PAREN)
-                file_fatal_error("expecting (, got '%s' on line %zu\n",
+                file_fatal_error("expected (, got '%s' on line %zu\n",
                                  ts->value, ts->line_n);
 
         if (peek(ts)->type == TOKEN_R_PAREN) {
@@ -150,7 +206,7 @@ static ast_node_t *parse_fn_proto(token_t *ts)
                                          ts->value, ts->line_n, name);
         }
 
-        /* skip over ) or type declaration token */
+        /* eat the ) or type declaration token */
         eat(ts);
 
         return create_fn_proto(name, args, arg_count, data_type);
@@ -201,16 +257,72 @@ static ast_node_t *parse_var(token_t *ts)
 
         eat(ts);
 
-        if (ts->type != TOKEN_EQ || ts->type != TOKEN_SEMI_COLON)
-                file_fatal_error("expected assignment or semi-colon, got %s on line %zu\n",
-                                 ts->value, ts->line_n);
+        if (ts->type != TOKEN_EQ && ts->type != TOKEN_SEMI_COLON)
+                file_fatal_error("expecting assignment or declaration, got '%s' on line %zu\n",
+                                ts->value, ts->line_n);
+
+        if (ts->type == TOKEN_EQ) {
+                /* get to the value */
+                eat(ts);
+
+                if (peek(ts)->type == TOKEN_SEMI_COLON) {
+                        value = ts->value;
+                } else {
+                        /* TODO: parse expression */
+                }
+        }
 
-        /* declaration */
-        if (ts->type == TOKEN_SEMI_COLON)
-                return create_var_expr(name, type, value, mutable);
+        return create_var(name, type, value, mutable);
+}
+
+static void emit_node(ast_node_list_t *nl, ast_node_t *node)
+{
+        if (nl->node == NULL) {
+                nl->node = node;
+                return;
+        }
+
+        while (nl->next != NULL)
+                nl = nl->next;
+
+        nl->next = smalloc(sizeof(ast_node_list_t));
+        nl->next->node = node;
+        nl->next->next = NULL;
+}
 
-        /* assignment */
+static void dump_node(ast_node_t *node)
+{
+        switch (node->type) {
+        case TYPE_VAR:
+                printf("variable:\n\tname: %s\n\ttype: %u\n\tmutable:%d\n",
+                       node->var.name, node->var.data_type, node->var.mutable);
+                break;
+        case TYPE_PROTO:
+                printf("proto:\n\tname: %s\n\ttype: %u\n",
+                       node->prototype.name, node->prototype.data_type);
+                break;
+        case TYPE_FN:
+                printf("function:\n");
+                dump_node(node->fn.prototype);
+                break;
+        case TYPE_CALL:
+                /* TODO */
+                break;
+        case TYPE_IF:
+                /* TODO */
+                break;
+        case TYPE_BIN_EXP:
+                /* TODO */
+                break;
+        }
+}
 
+static void dump_nodes(ast_node_list_t *nl)
+{
+        do {
+                dump_node(nl->node);
+                nl = nl->next;
+        } while (nl->next != NULL);
 }
 
 static token_t *peek(token_t *ts)

+ 0 - 0
src/parser.h


Some files were not shown because too many files changed in this diff