11 コミット 69ee18e3f4 ... aa739b73ee

作者 SHA1 メッセージ 日付
  soud aa739b73ee default int and float types 9 年 前
  Steven 743bad060b (almost) all operators in parser, shorthand if/unless and mod assignment operator 9 年 前
  Steven 598cfbb93e pow operator 9 年 前
  Steven 69f881ce1e float32 and float64 added, if parser extended 9 年 前
  soud ae34909c9b expression parsing workable state 9 年 前
  Steven 88005c2d52 stack push/pop fix 9 年 前
  Steven c1a8774d2a readme fix 9 年 前
  Steven 1d2a6750bb stack to token stack, basic expression parsing 9 年 前
  soud 411136a90e operators association in parsing expression 9 年 前
  Steven 19637b39c6 shunting yard start 9 年 前
  Steven 07548ee7ff updated readme and stack data type 9 年 前
10 ファイル変更316 行追加44 行削除
  1. 8 4
      Makefile
  2. 53 5
      README.md
  3. 2 1
      src/ast.c
  4. 7 1
      src/ast.h
  5. 10 1
      src/lexer.c
  6. 3 0
      src/lexer.h
  7. 156 32
      src/parser.c
  8. 1 0
      src/parser.h
  9. 76 0
      src/stack.c
  10. 0 0
      src/stack.h

+ 8 - 4
Makefile

@@ -1,7 +1,12 @@
+CC?=gcc
+CXX?=g++
+
 CFLAGS=-Wall -Wextra -O2 -std=c11 `llvm-config --cflags` -c
 LDFLAGS=`llvm-config --cxxflags --ldflags`
-CFILES=src/ast.c src/erupt.c src/lexer.c src/main.c src/parser.c src/token.c
-OBJFILES=ast.o erupt.o lexer.o main.o parser.o token.o
+
+CFILES=src/ast.c src/erupt.c src/lexer.c src/main.c src/parser.c src/token.c \
+       src/stack.c
+OBJFILES=ast.o erupt.o lexer.o main.o parser.o token.o stack.o
 
 all:
 	@mkdir -p bin/
@@ -13,8 +18,7 @@ install:
 	cp bin/erupt /usr/bin/
 
 clean:
-	@rm *.o
-	@rm -rf bin/
+	@-rm -rf $(OBJFILES) bin/
 
 test:
 	@bin/erupt tests/*

+ 53 - 5
README.md

@@ -2,8 +2,28 @@
 Erupt is my attempt at building a compiler in C. This project is for learning
 purpose only and should not be used in production.
 
-## Usage
-### Build
+# Example
+A simple hello world program may look like this:
+
+```rust
+use io;
+
+fun main
+{
+    io::writeln('Hello world');
+}
+```
+
+# Installation
+Installing Erupt requires some dependencies:
+
+## Dependencies
+* git
+* LLVM >= 3.6 installed and `llvm-config` in your `$PATH`
+* a C and C++ compiler (eg. `gcc`, `clang`)
+* GNU `make`
+
+## Building
 
 ```bash
 $ git clone https://github.com/soudy/Erupt
@@ -11,7 +31,35 @@ $ cd Erupt
 $ make
 ```
 
-This will create the binary `bin/erupt`.
+Or if you wish to use a non-default C compiler:
+```bash
+$ CC=compiler make
+```
+(where `compiler` is the compiler of choice).
 
-### Run
-    $ erupt [options] file
+Erupt has been tested and proven to work with `gcc` 5.2.0 and `clang` 3.6.2 on
+x86_64 GNU/Linux, but most other C compilers with C99 support should work.
+
+This will create the binary `bin/erupt`. If you wish to install `erupt` into
+your path, run `make install` with superuser privileges.
+
+# Run
+```bash
+$ erupt [options] file
+```
+
+## Options
+```
+-o, --output
+       set output name (default: main)
+-v, --version
+       show version
+-V, --verbose
+       verbose mode
+-t, --tokens
+       show generated token stream
+-n, --nodes
+       show nodes of the generated AST
+-h, --help
+       show this
+```

+ 2 - 1
src/ast.c

@@ -33,7 +33,7 @@ ast_node_t *create_number(float value)
         return node;
 }
 
-ast_node_t *create_var(char *name, ast_type_t data_type, bool mutable)
+ast_node_t *create_var(char *name, ast_type_t data_type, bool mutable, ast_node_t *value)
 {
         ast_node_t *node = smalloc(sizeof(ast_node_t));
 
@@ -41,6 +41,7 @@ ast_node_t *create_var(char *name, ast_type_t data_type, bool mutable)
         node->var.name = strdup(name);
         node->var.data_type = data_type;
         node->var.mutable = mutable;
+        node->var.value = value;
 
         return node;
 }

+ 7 - 1
src/ast.h

@@ -29,6 +29,7 @@
 #include "lexer.h"
 
 typedef enum {
+        INT,
         I8,
         I16,
         I32,
@@ -40,6 +41,8 @@ typedef enum {
         STR,
         BOOL,
         FLOAT,
+        F32,
+        F64,
         NONE,
         UNKNOWN
 } ast_type_t;
@@ -60,6 +63,7 @@ typedef struct {
 struct ast_node;
 struct ast_node_list;
 
+/* TODO: number types */
 typedef struct {
         float value;
 } ast_number_t;
@@ -68,6 +72,7 @@ typedef struct {
         char *name;
         ast_type_t data_type;
         bool mutable;
+        struct ast_node *value;
 } ast_var_t;
 
 typedef struct {
@@ -126,7 +131,8 @@ typedef struct ast_node_list {
         struct ast_node_list *next;
 } ast_node_list_t;
 
-ast_node_t *create_var(char *name, ast_type_t data_type, bool mutable);
+ast_node_t *create_number(float value);
+ast_node_t *create_var(char *name, ast_type_t data_type, bool mutable, ast_node_t *value);
 ast_node_t *create_fn_proto(char *name, ast_node_list_t *args, ast_type_t data_type);
 ast_node_t *create_fn(ast_node_t *prototype, ast_node_list_t *body);
 ast_node_t *create_call(char *name, ast_node_list_t *args);

+ 10 - 1
src/lexer.c

@@ -79,7 +79,6 @@ lexer_t *lex(char *target_file, char *source)
                 case '}': emit_token(lexer, TOKEN_R_BRACE); break;
                 case '.': emit_token(lexer, TOKEN_DOT); break;
                 case ',': emit_token(lexer, TOKEN_COMMA); break;
-                case '%': emit_token(lexer, TOKEN_MOD); break;
                 case ';': emit_token(lexer, TOKEN_SEMI_COLON); break;
                 case '"': read_string(lexer, '"'); break;
                 case '\'': read_string(lexer, '\''); break;
@@ -114,10 +113,20 @@ lexer_t *lex(char *target_file, char *source)
                         if (peek(lexer) == '=') {
                                 eat(lexer);
                                 emit_token(lexer, TOKEN_STAR_EQ);
+                        } else if (peek(lexer) == '*'){
+                                eat(lexer);
+                                emit_token(lexer, TOKEN_STAR_STAR);
                         } else {
                                 emit_token(lexer, TOKEN_STAR);
                         }
                         break;
+                case '%':
+                        if (peek(lexer) == '=') {
+                                eat(lexer);
+                                emit_token(lexer, TOKEN_MOD_EQ);
+                        } else {
+                                emit_token(lexer, TOKEN_MOD); break;
+                        }
                 case '&':
                         if (peek(lexer) == '&') {
                                 eat(lexer);

+ 3 - 0
src/lexer.h

@@ -39,6 +39,7 @@
 #define UNLESS_KEYWORD   "unless"
 #define ELSE_KEYWORD     "else"
 
+#define INT_KEYWORD    "Int"
 #define I8_KEYWORD     "Int8"
 #define I16_KEYWORD    "Int16"
 #define I32_KEYWORD    "Int32"
@@ -50,6 +51,8 @@
 #define STRING_KEYWORD "Str"
 #define BOOL_KEYWORD   "Bool"
 #define FLOAT_KEYWORD  "Float"
+#define F32_KEYWORD    "Float32"
+#define F64_KEYWORD    "Float64"
 #define NONE_KEYWORD   "None"
 
 typedef struct {

+ 156 - 32
src/parser.c

@@ -30,15 +30,15 @@ static ast_node_list_t *parse_fn_params(token_t *ts);
 static ast_node_t *parse_call(token_t *ts);
 static ast_node_t *parse_var(token_t *ts);
 static ast_node_t *parse_if(token_t *ts);
-static ast_node_t *parse_expr(token_t *ts);
+static ast_node_t *parse_expr(token_t *ts, tokentype delimiter);
 static void emit_node(ast_node_list_t *nl, ast_node_t *node);
 static token_t *peek(token_t *ts);
 static token_t *eat(token_t *ts);
 
 bool PARSE_FAILED = false;
 
-/* string => ast_type_t "map" for detecting types */
-static match_type_t data_types[] = {
+static const match_type_t data_types[] = {
+        {INT_KEYWORD    , INT},
         {I8_KEYWORD     , I8},
         {I16_KEYWORD    , I16},
         {I32_KEYWORD    , I32},
@@ -50,19 +50,52 @@ static match_type_t data_types[] = {
         {STRING_KEYWORD , STR},
         {BOOL_KEYWORD   , BOOL},
         {FLOAT_KEYWORD  , FLOAT},
+        {F32_KEYWORD    , F32},
+        {F64_KEYWORD    , F64},
         {NONE_KEYWORD   , NONE}
 };
 
-static ast_op_t operators[] = {
-        {TOKEN_PLUS     , 5  , ASSOC_LEFT , false},
-        {TOKEN_MIN      , 5  , ASSOC_LEFT , false},
-        {TOKEN_STAR     , 7  , ASSOC_LEFT , false},
-        {TOKEN_SLASH    , 7  , ASSOC_LEFT , false},
-        {TOKEN_PLUS_EQ  , 0  , ASSOC_NONE , false},
-        {TOKEN_MIN_EQ   , 0  , ASSOC_NONE , false},
-        {TOKEN_SLASH_EQ , 0  , ASSOC_NONE , false},
-        {TOKEN_SLASH_EQ , 0  , ASSOC_NONE , false},
-        {TOKEN_EQ       , 10 , ASSOC_NONE , false}
+static const ast_op_t operators[] = {
+        /* assignment operators */
+        {TOKEN_EQ        , 0  , ASSOC_NONE  , false},
+        {TOKEN_PLUS_EQ   , 0  , ASSOC_NONE  , false},
+        {TOKEN_MIN_EQ    , 0  , ASSOC_NONE  , false},
+        {TOKEN_STAR_EQ   , 0  , ASSOC_NONE  , false},
+        {TOKEN_SLASH_EQ  , 0  , ASSOC_NONE  , false},
+        {TOKEN_MOD_EQ    , 0  , ASSOC_NONE  , false},
+        {TOKEN_B_AND_EQ  , 0  , ASSOC_NONE  , false},
+        {TOKEN_B_OR_EQ   , 0  , ASSOC_NONE  , false},
+        {TOKEN_B_XOR_EQ  , 0  , ASSOC_NONE  , false},
+
+        /* (bitwise) logical operators */
+        {TOKEN_OR        , 1  , ASSOC_RIGHT , false},
+        {TOKEN_AND       , 2  , ASSOC_RIGHT , false},
+        {TOKEN_B_OR      , 3  , ASSOC_RIGHT , false},
+        {TOKEN_B_XOR     , 4  , ASSOC_RIGHT , false},
+        {TOKEN_B_AND     , 5  , ASSOC_RIGHT , false},
+
+        /* comparison operators */
+        {TOKEN_EQ_EQ     , 6  , ASSOC_LEFT  , false},
+        {TOKEN_NOT_EQ    , 6  , ASSOC_LEFT  , false},
+        {TOKEN_LT        , 7  , ASSOC_LEFT  , false},
+        {TOKEN_GT        , 7  , ASSOC_LEFT  , false},
+        {TOKEN_LT_EQ     , 7  , ASSOC_LEFT  , false},
+        {TOKEN_GT_EQ     , 7  , ASSOC_LEFT  , false},
+
+        /* mathematical operators */
+        {TOKEN_PLUS      , 8  , ASSOC_LEFT  , false},
+        {TOKEN_MIN       , 8  , ASSOC_LEFT  , false},
+        {TOKEN_STAR      , 9  , ASSOC_LEFT  , false},
+        {TOKEN_SLASH     , 9  , ASSOC_LEFT  , false},
+        {TOKEN_MOD       , 9  , ASSOC_LEFT  , false},
+        {TOKEN_STAR_STAR , 10 , ASSOC_RIGHT , false},
+
+        /* unary operators */
+        {TOKEN_BANG      , 11 , ASSOC_RIGHT , true},
+        {TOKEN_B_NOT     , 11 , ASSOC_RIGHT , true},
+
+        /* misc */
+        {TOKEN_L_PAREN   , 20 , ASSOC_NONE  , false}
 };
 
 static bool is_operator(tokentype token)
@@ -76,6 +109,17 @@ static bool is_operator(tokentype token)
         return false;
 }
 
+static ast_op_t get_operator(tokentype token)
+{
+        size_t ops_len = sizeof(operators) / sizeof(operators[0]);
+
+        for (size_t i = 0; i < ops_len; ++i)
+                if (token == operators[i].symbol)
+                        return operators[i];
+
+        return (ast_op_t) { TOKEN_ERROR, 0, ASSOC_NONE, false };
+}
+
 static ast_type_t get_type(char *name)
 {
         size_t types_len = sizeof(data_types) / sizeof(data_types[0]);
@@ -92,7 +136,7 @@ static ast_type_t get_type(char *name)
  *
  * possible syntax:
  *         fun function() ...
- *         mut i32 var ...
+ *         mut Int32 var ...
  *         use module ...
  *
  * any other declaration wil fail as these are the only declarations allowed at
@@ -136,6 +180,9 @@ static ast_node_t *parse_block_level(token_t *ts)
                 /* TODO: parse while */
         case TOKEN_RETURN:
                 /* TODO: parse return */
+        case TOKEN_EOF:
+                file_fatal_error("unclosed block on line %zu\n",
+                                 ts->line_n);
         default:
                 parser_file_error("unexpected '%s' on line %zu\n",
                                   ts->value, ts->line_n);
@@ -176,7 +223,6 @@ ast_node_list_t *parse_token_stream(token_t *ts)
                 eat(ts);
         } while (ts->type != TOKEN_EOF);
 
-
         return nl;
 }
 
@@ -200,7 +246,7 @@ static ast_node_t *parse_fn(token_t *ts)
 
 /*
  * func(...)
- * str var = ....
+ * Str var = ....
  * var += 5;
  */
 static ast_node_t *parse_ident(token_t *ts)
@@ -209,13 +255,13 @@ static ast_node_t *parse_ident(token_t *ts)
                 return parse_call(ts);
 
         if (is_operator(peek(ts)->type))
-                return parse_expr(ts);
+                return parse_expr(ts, TOKEN_SEMI_COLON);
 
         return parse_var(ts);
 }
 
 /*
- * fun func(...) -> i32 ...
+ * fun func(...) -> Int32 ...
  * fun func(...) ...
  * fun func ...
  */
@@ -281,20 +327,18 @@ static ast_node_t *parse_call(token_t *ts)
 }
 
 /*
- * i32 var;
- * i32 var = ...
- * mut i32 var = ...
+ * Int32 var;
+ * Int32 var = ...
+ * mut Int32 var = ...
  */
 static ast_node_t *parse_var(token_t *ts)
 {
         char *name;
-        bool mutable = false;
+        bool mutable = ts->type == TOKEN_MUTABLE;
         ast_type_t type = UNKNOWN;
 
-        if (ts->type == TOKEN_MUTABLE) {
-                mutable = 1;
+        if (ts->type == TOKEN_MUTABLE)
                 eat(ts);
-        }
 
         if ((type = get_type(ts->value)) == UNKNOWN)
                 parser_file_error("unknown data type '%s' on line %zu\n",
@@ -308,7 +352,14 @@ static ast_node_t *parse_var(token_t *ts)
 
         eat(ts);
 
-        return create_var(name, type, mutable);
+        if (ts->type == TOKEN_SEMI_COLON)
+                return create_var(name, type, mutable, NULL);
+        else if (is_operator(ts->type))
+                return create_var(name, type, mutable, parse_expr(ts, TOKEN_SEMI_COLON));
+
+        parser_file_error("expected semi-colon or assignment, got '%s' on line %zu\n",
+                          ts->value, ts->line_n);
+        return create_var(name, type, mutable, NULL);
 }
 
 static ast_node_t *parse_if(token_t *ts)
@@ -321,9 +372,13 @@ static ast_node_t *parse_if(token_t *ts)
         ast_node_list_t *true_body = NULL;
         ast_node_list_t *false_body = NULL;
 
-        /* TODO: parse expression */
-        while (ts->type != TOKEN_L_BRACE)
+        condition = parse_expr(ts, TOKEN_R_BRACE);
+        true_body = parse_block(ts);
+
+        if (ts->type == TOKEN_ELSE) {
                 eat(ts);
+                false_body = parse_block(ts);
+        }
 
         if (swap)
                 swap_lists(true_body, false_body);
@@ -331,19 +386,88 @@ static ast_node_t *parse_if(token_t *ts)
         return create_if(condition, true_body, false_body);
 }
 
-static ast_node_t *parse_expr(token_t *ts)
+static ast_node_t *parse_expr(token_t *ts, tokentype delimiter)
 {
-        printf("one day i will be parsing expressions\n");
+        /* eat assignment token */
+        eat(ts);
+
+        size_t line_start = ts->line_n;
+        stack_t *operators = smalloc(sizeof(stack_t));
+        stack_t *output = smalloc(sizeof(stack_t));
+
+        for (token_t prev = NULL_TOKEN; ts->type != delimiter; prev = *ts, eat(ts)) {
+                if (is_operator(ts->type)) {
+                        /* an identifier or number is expected after an
+                           operator, except when the current operator is
+                           anything but left-associated */
+                        if (is_operator(prev.type) && get_operator(ts->type).assoc == ASSOC_LEFT)
+                                parser_file_error("unexpected '%s' in expression on line %zu\n",
+                                                  ts->value, ts->line_n);
+                        if (stack_size(operators) >= 2 &&
+                            get_operator(ts->type).assoc == ASSOC_LEFT &&
+                            get_operator(ts->type).precedence >=
+                            get_operator(stack_top(&operators).type).precedence) {
+                                stack_push(&output, stack_pop(&operators));
+                        }
+
+                        stack_push(&operators, *ts);
+                } else if (ts->type == TOKEN_IDENT || ts->type == TOKEN_NUMBER) {
+                        /* an operator is expected after an identifier, number
+                           or (. any other token is invalid */
+                        if (prev.type == TOKEN_IDENT || prev.type == TOKEN_NUMBER ||
+                            prev.type == TOKEN_R_PAREN)
+                                parser_file_error("unexpected '%s' in expression on line %zu\n",
+                                                  ts->value, ts->line_n);
+                        stack_push(&output, *ts);
+                } else if (ts->type == TOKEN_IF || ts->type == TOKEN_UNLESS) {
+                        bool is_unless = ts->type == TOKEN_UNLESS;
+
+                        eat(ts);
+
+                        /* TODO: shorthand if/unless */
+                        return create_if(parse_expr(ts, TOKEN_SEMI_COLON), NULL, NULL);
+                } else if (ts->type == TOKEN_R_PAREN) {
+                        /* pop all operators to output stack until ( is found */
+                        while (stack_top(&operators).type != TOKEN_L_PAREN) {
+                                if (!stack_top(&operators).type) {
+                                        parser_file_error("unmatched ')' in expression on line %zu\n",
+                                                          ts->line_n);
+                                        break;
+                                }
+
+                                stack_push(&output, stack_pop(&operators));
+                        }
+
+                        /* pop the remaining ( */
+                        stack_pop(&operators);
+                } else {
+                        if (ts->type == TOKEN_EOF)
+                                file_fatal_error("unterminated expression on line %zu\n",
+                                                 line_start);
+
+                        parser_file_error("unexpected token '%s' in expression on line %zu\n",
+                                          ts->value, ts->line_n);
+                }
+        }
+
+        /* any remaining operators get pushed to output */
+        while (stack_size(operators))
+                stack_push(&output, stack_pop(&operators));
+
+        stack_dump(output);
+
+        free(operators);
+
         return NULL;
 }
 
 static void emit_node(ast_node_list_t *nl, ast_node_t *node)
 {
         /* node could be NULL in some context, such as import */
-        if (node == NULL)
+        if (!node)
                 return;
 
-        if (nl->node == NULL) {
+        if (!nl->node) {
                 nl->node = node;
                 return;
         }

+ 1 - 0
src/parser.h

@@ -26,6 +26,7 @@
 #include "lexer.h"
 #include "token.h"
 #include "ast.h"
+#include "stack.h"
 
 #define parser_file_error(...) do { \
                                        PARSE_FAILED = true; \

+ 76 - 0
src/stack.c

@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 soud
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "erupt.h"
+#include "stack.h"
+
+void stack_push(stack_t **stack, token_t value)
+{
+        stack_t *tmp = smalloc(sizeof(stack_t));
+
+        tmp->value = value;
+        tmp->next = *stack;
+
+        *stack = tmp;
+}
+
+token_t stack_pop(stack_t **stack)
+{
+        stack_t *tmp = *stack;
+        token_t val = tmp->value;
+
+        *stack = tmp->next;
+
+        free(tmp);
+
+        return val;
+}
+
+token_t stack_top(stack_t **stack)
+{
+        return (*stack)->value;
+}
+
+void stack_dump(stack_t *stack)
+{
+        if (!stack)
+                return;
+
+        while (stack->next) {
+                printf("%s ", (char *) stack->value.value);
+                stack = stack->next;
+        };
+
+        printf("\n");
+}
+
+size_t stack_size(stack_t *stack)
+{
+        size_t size = 0;
+
+        while (stack->next) {
+                ++size;
+                stack = stack->next;
+        }
+
+        return size;
+}

+ 0 - 0
src/stack.h


この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません