123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772 |
- /*
- * Copyright (C) 2006 Edmund GRIMLEY EVANS <edmundo@rano.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
- /*
- * A self-compiling compiler for a small subset of C.
- */
- /* Our library functions. */
- void exit(int);
- int getchar(void);
- void *malloc(int);
- int putchar(int);
- /* The first thing defined must be main(). */
- int main1();
- int main()
- {
- int a = 10;
- int b = a;
- return main1();
- }
- char *my_realloc(char *old, int oldlen, int newlen)
- {
- char *new = malloc(newlen);
- int i = 0;
- while (i <= oldlen - 1) {
- new[i] = old[i];
- i = i + 1;
- }
- return new;
- }
- int nextc;
- char *token;
- int token_size;
- void error()
- {
- exit(1);
- }
- int i;
- void takechar()
- {
- if (token_size <= i + 1) {
- int x = (i + 10) << 1;
- token = my_realloc(token, token_size, x);
- token_size = x;
- }
- token[i] = nextc;
- i = i + 1;
- nextc = getchar();
- }
- void get_token()
- {
- int w = 1;
- while (w) {
- w = 0;
- while ((nextc == ' ') | (nextc == 9) | (nextc == 10))
- nextc = getchar();
- i = 0;
- while ((('a' <= nextc) & (nextc <= 'z')) |
- (('0' <= nextc) & (nextc <= '9')) | (nextc == '_'))
- takechar();
- if (i == 0)
- while ((nextc == '<') | (nextc == '=') | (nextc == '>') |
- (nextc == '|') | (nextc == '&') | (nextc == '!'))
- takechar();
- if (i == 0) {
- if (nextc == 39) {
- takechar();
- while (nextc != 39)
- takechar();
- takechar();
- }
- else if (nextc == '"') {
- takechar();
- while (nextc != '"')
- takechar();
- takechar();
- }
- else if (nextc == '/') {
- takechar();
- if (nextc == '*') {
- nextc = getchar();
- while (nextc != '/') {
- while (nextc != '*')
- nextc = getchar();
- nextc = getchar();
- }
- nextc = getchar();
- w = 1;
- }
- }
- else if (nextc != 0-1)
- takechar();
- }
- token[i] = 0;
- }
- }
- int peek(char *s)
- {
- int i = 0;
- while ((s[i] == token[i]) & (s[i] != 0))
- i = i + 1;
- return s[i] == token[i];
- }
- int accept(char *s)
- {
- if (peek(s)) {
- get_token();
- return 1;
- }
- else
- return 0;
- }
- void expect(char *s)
- {
- if (accept(s) == 0)
- error();
- }
- char *code;
- int code_size;
- int codepos;
- int code_offset;
- void save_int(char *p, int n)
- {
- p[0] = n;
- p[1] = n >> 8;
- p[2] = n >> 16;
- p[3] = n >> 24;
- }
- int load_int(char *p)
- {
- return ((p[0] & 255) + ((p[1] & 255) << 8) +
- ((p[2] & 255) << 16) + ((p[3] & 255) << 24));
- }
- void emit(int n, char *s)
- {
- i = 0;
- if (code_size <= codepos + n) {
- int x = (codepos + n) << 1;
- code = my_realloc(code, code_size, x);
- code_size = x;
- }
- while (i <= n - 1) {
- code[codepos] = s[i];
- codepos = codepos + 1;
- i = i + 1;
- }
- }
- void be_push()
- {
- emit(1, "\x50"); /* push %eax */
- }
- void be_pop(int n)
- {
- emit(6, "\x81\xc4...."); /* add $(n * 4),%esp */
- save_int(code + codepos - 4, n << 2);
- }
- char *table;
- int table_size;
- int table_pos;
- int stack_pos;
- int sym_lookup(char *s)
- {
- int t = 0;
- int current_symbol = 0;
- while (t <= table_pos - 1) {
- i = 0;
- while ((s[i] == table[t]) & (s[i] != 0)) {
- i = i + 1;
- t = t + 1;
- }
- if (s[i] == table[t])
- current_symbol = t;
- while (table[t] != 0)
- t = t + 1;
- t = t + 6;
- }
- return current_symbol;
- }
- void sym_declare(char *s, int type, int value)
- {
- int t = table_pos;
- i = 0;
- int x;
- while (s[i] != 0) {
- if (table_size <= t + 10) {
- x = (t + 10) << 1;
- table = my_realloc(table, table_size, x);
- table_size = x;
- }
- table[t] = s[i];
- i = i + 1;
- t = t + 1;
- }
- table[t] = 0;
- table[t + 1] = type;
- save_int(table + t + 2, value);
- table_pos = t + 6;
- }
- int sym_declare_global(char *s)
- {
- int current_symbol = sym_lookup(s);
- if (current_symbol == 0) {
- sym_declare(s, 'U', code_offset);
- current_symbol = table_pos - 6;
- }
- return current_symbol;
- }
- void sym_define_global(int current_symbol)
- {
- int i;
- int j;
- int t = current_symbol;
- int v = codepos + code_offset;
- if (table[t + 1] != 'U')
- error(); /* symbol redefined */
- i = load_int(table + t + 2) - code_offset;
- while (i) {
- j = load_int(code + i) - code_offset;
- save_int(code + i, v);
- i = j;
- }
- table[t + 1] = 'D';
- save_int(table + t + 2, v);
- }
- int number_of_args;
- void sym_get_value(char *s)
- {
- int t;
- if ((t = sym_lookup(s)) == 0)
- error();
- emit(5, "\xb8...."); /* mov $n,%eax */
- save_int(code + codepos - 4, load_int(table + t + 2));
- if (table[t + 1] == 'D') { /* defined global */
- }
- else if (table[t + 1] == 'U') /* undefined global */
- save_int(table + t + 2, codepos + code_offset - 4);
- else if (table[t + 1] == 'L') { /* local variable */
- int k = (stack_pos - table[t + 2] - 1) << 2;
- emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */
- save_int(code + codepos - 4, k);
- }
- else if (table[t + 1] == 'A') { /* argument */
- int k = (stack_pos + number_of_args - table[t + 2] + 1) << 2;
- emit(7, "\x8d\x84\x24...."); /* lea (n * 4)(%esp),%eax */
- save_int(code + codepos - 4, k);
- }
- else
- error();
- }
- void be_start()
- {
- emit(16, "\x7f\x45\x4c\x46\x01\x01\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00");
- emit(16, "\x02\x00\x03\x00\x01\x00\x00\x00\x54\x80\x04\x08\x34\x00\x00\x00");
- emit(16, "\x00\x00\x00\x00\x00\x00\x00\x00\x34\x00\x20\x00\x01\x00\x00\x00");
- emit(16, "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x80\x04\x08");
- emit(16, "\x00\x80\x04\x08\x10\x4b\x00\x00\x10\x4b\x00\x00\x07\x00\x00\x00");
- emit(16, "\x00\x10\x00\x00\xe8\x00\x00\x00\x00\x89\xc3\x31\xc0\x40\xcd\x80");
- sym_define_global(sym_declare_global("exit"));
- /* pop %ebx ; pop %ebx ; xor %eax,%eax ; inc %eax ; int $0x80 */
- emit(7, "\x5b\x5b\x31\xc0\x40\xcd\x80");
- sym_define_global(sym_declare_global("getchar"));
- /* mov $3,%eax ; xor %ebx,%ebx ; push %ebx ; mov %esp,%ecx */
- emit(10, "\xb8\x03\x00\x00\x00\x31\xdb\x53\x89\xe1");
- /* xor %edx,%edx ; inc %edx ; int $0x80 */
- /* test %eax,%eax ; pop %eax ; jne . + 7 */
- emit(10, "\x31\xd2\x42\xcd\x80\x85\xc0\x58\x75\x05");
- /* mov $-1,%eax ; ret */
- emit(6, "\xb8\xff\xff\xff\xff\xc3");
- sym_define_global(sym_declare_global("malloc"));
- /* mov 4(%esp),%eax */
- emit(4, "\x8b\x44\x24\x04");
- /* push %eax ; xor %ebx,%ebx ; mov $45,%eax ; int $0x80 */
- emit(10, "\x50\x31\xdb\xb8\x2d\x00\x00\x00\xcd\x80");
- /* pop %ebx ; add %eax,%ebx ; push %eax ; push %ebx ; mov $45,%eax */
- emit(10, "\x5b\x01\xc3\x50\x53\xb8\x2d\x00\x00\x00");
- /* int $0x80 ; pop %ebx ; cmp %eax,%ebx ; pop %eax ; jle . + 7 */
- emit(8, "\xcd\x80\x5b\x39\xc3\x58\x7e\x05");
- /* mov $-1,%eax ; ret */
- emit(6, "\xb8\xff\xff\xff\xff\xc3");
- sym_define_global(sym_declare_global("putchar"));
- /* mov $4,%eax ; xor %ebx,%ebx ; inc %ebx */
- emit(8, "\xb8\x04\x00\x00\x00\x31\xdb\x43");
- /* lea 4(%esp),%ecx ; mov %ebx,%edx ; int $0x80 ; ret */
- emit(9, "\x8d\x4c\x24\x04\x89\xda\xcd\x80\xc3");
- save_int(code + 85, codepos - 89); /* entry set to first thing in file */
- }
- void be_finish()
- {
- save_int(code + 68, codepos);
- save_int(code + 72, codepos);
- i = 0;
- while (i <= codepos - 1) {
- putchar(code[i]);
- i = i + 1;
- }
- }
- void promote(int type)
- {
- /* 1 = char lval, 2 = int lval, 3 = other */
- if (type == 1)
- emit(3, "\x0f\xbe\x00"); /* movsbl (%eax),%eax */
- else if (type == 2)
- emit(2, "\x8b\x00"); /* mov (%eax),%eax */
- }
- int expression();
- /*
- * primary-expr:
- * identifier
- * constant
- * ( expression )
- */
- int primary_expr()
- {
- int type;
- if (('0' <= token[0]) & (token[0] <= '9')) {
- int n = 0;
- i = 0;
- while (token[i]) {
- n = (n << 1) + (n << 3) + token[i] - '0';
- i = i + 1;
- }
- emit(5, "\xb8...."); /* mov $x,%eax */
- save_int(code + codepos - 4, n);
- type = 3;
- }
- else if (('a' <= token[0]) & (token[0] <= 'z')) {
- sym_get_value(token);
- type = 2;
- }
- else if (accept("(")) {
- type = expression();
- if (peek(")") == 0)
- error();
- }
- else if ((token[0] == 39) & (token[1] != 0) &
- (token[2] == 39) & (token[3] == 0)) {
- emit(5, "\xb8...."); /* mov $x,%eax */
- save_int(code + codepos - 4, token[1]);
- type = 3;
- }
- else if (token[0] == '"') {
- int i = 0;
- int j = 1;
- int k;
- while (token[j] != '"') {
- if ((token[j] == 92) & (token[j + 1] == 'x')) {
- if (token[j + 2] <= '9')
- k = token[j + 2] - '0';
- else
- k = token[j + 2] - 'a' + 10;
- k = k << 4;
- if (token[j + 3] <= '9')
- k = k + token[j + 3] - '0';
- else
- k = k + token[j + 3] - 'a' + 10;
- token[i] = k;
- j = j + 4;
- }
- else {
- token[i] = token[j];
- j = j + 1;
- }
- i = i + 1;
- }
- token[i] = 0;
- /* call ... ; the string ; pop %eax */
- emit(5, "\xe8....");
- save_int(code + codepos - 4, i + 1);
- emit(i + 1, token);
- emit(1, "\x58");
- type = 3;
- }
- else
- error();
- get_token();
- return type;
- }
- void binary1(int type)
- {
- promote(type);
- be_push();
- stack_pos = stack_pos + 1;
- }
- int binary2(int type, int n, char *s)
- {
- promote(type);
- emit(n, s);
- stack_pos = stack_pos - 1;
- return 3;
- }
- /*
- * postfix-expr:
- * primary-expr
- * postfix-expr [ expression ]
- * postfix-expr ( expression-list-opt )
- */
- int postfix_expr()
- {
- int type = primary_expr();
- if (accept("[")) {
- binary1(type); /* pop %ebx ; add %ebx,%eax */
- binary2(expression(), 3, "\x5b\x01\xd8");
- expect("]");
- type = 1;
- }
- else if (accept("(")) {
- int s = stack_pos;
- be_push();
- stack_pos = stack_pos + 1;
- if (accept(")") == 0) {
- promote(expression());
- be_push();
- stack_pos = stack_pos + 1;
- while (accept(",")) {
- promote(expression());
- be_push();
- stack_pos = stack_pos + 1;
- }
- expect(")");
- }
- emit(7, "\x8b\x84\x24...."); /* mov (n * 4)(%esp),%eax */
- save_int(code + codepos - 4, (stack_pos - s - 1) << 2);
- emit(2, "\xff\xd0"); /* call *%eax */
- be_pop(stack_pos - s);
- stack_pos = s;
- type = 3;
- }
- return type;
- }
- /*
- * additive-expr:
- * postfix-expr
- * additive-expr + postfix-expr
- * additive-expr - postfix-expr
- */
- int additive_expr()
- {
- int type = postfix_expr();
- while (1) {
- if (accept("+")) {
- binary1(type); /* pop %ebx ; add %ebx,%eax */
- type = binary2(postfix_expr(), 3, "\x5b\x01\xd8");
- }
- else if (accept("-")) {
- binary1(type); /* pop %ebx ; sub %eax,%ebx ; mov %ebx,%eax */
- type = binary2(postfix_expr(), 5, "\x5b\x29\xc3\x89\xd8");
- }
- else
- return type;
- }
- }
- /*
- * shift-expr:
- * additive-expr
- * shift-expr << additive-expr
- * shift-expr >> additive-expr
- */
- int shift_expr()
- {
- int type = additive_expr();
- while (1) {
- if (accept("<<")) {
- binary1(type); /* mov %eax,%ecx ; pop %eax ; shl %cl,%eax */
- type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xe0");
- }
- else if (accept(">>")) {
- binary1(type); /* mov %eax,%ecx ; pop %eax ; sar %cl,%eax */
- type = binary2(additive_expr(), 5, "\x89\xc1\x58\xd3\xf8");
- }
- else
- return type;
- }
- }
- /*
- * relational-expr:
- * shift-expr
- * relational-expr <= shift-expr
- */
- int relational_expr()
- {
- int type = shift_expr();
- while (accept("<=")) {
- binary1(type);
- /* pop %ebx ; cmp %eax,%ebx ; setle %al ; movzbl %al,%eax */
- type = binary2(shift_expr(),
- 9, "\x5b\x39\xc3\x0f\x9e\xc0\x0f\xb6\xc0");
- }
- return type;
- }
- /*
- * equality-expr:
- * relational-expr
- * equality-expr == relational-expr
- * equality-expr != relational-expr
- */
- int equality_expr()
- {
- int type = relational_expr();
- while (1) {
- if (accept("==")) {
- binary1(type);
- /* pop %ebx ; cmp %eax,%ebx ; sete %al ; movzbl %al,%eax */
- type = binary2(relational_expr(),
- 9, "\x5b\x39\xc3\x0f\x94\xc0\x0f\xb6\xc0");
- }
- else if (accept("!=")) {
- binary1(type);
- /* pop %ebx ; cmp %eax,%ebx ; setne %al ; movzbl %al,%eax */
- type = binary2(relational_expr(),
- 9, "\x5b\x39\xc3\x0f\x95\xc0\x0f\xb6\xc0");
- }
- else
- return type;
- }
- }
- /*
- * bitwise-and-expr:
- * equality-expr
- * bitwise-and-expr & equality-expr
- */
- int bitwise_and_expr()
- {
- int type = equality_expr();
- while (accept("&")) {
- binary1(type); /* pop %ebx ; and %ebx,%eax */
- type = binary2(equality_expr(), 3, "\x5b\x21\xd8");
- }
- return type;
- }
- /*
- * bitwise-or-expr:
- * bitwise-and-expr
- * bitwise-and-expr | bitwise-or-expr
- */
- int bitwise_or_expr()
- {
- int type = bitwise_and_expr();
- while (accept("|")) {
- binary1(type); /* pop %ebx ; or %ebx,%eax */
- type = binary2(bitwise_and_expr(), 3, "\x5b\x09\xd8");
- }
- return type;
- }
- /*
- * expression:
- * bitwise-or-expr
- * bitwise-or-expr = expression
- */
- int expression()
- {
- int type = bitwise_or_expr();
- if (accept("=")) {
- be_push();
- stack_pos = stack_pos + 1;
- promote(expression());
- if (type == 2)
- emit(3, "\x5b\x89\x03"); /* pop %ebx ; mov %eax,(%ebx) */
- else
- emit(3, "\x5b\x88\x03"); /* pop %ebx ; mov %al,(%ebx) */
- stack_pos = stack_pos - 1;
- type = 3;
- }
- return type;
- }
- /*
- * type-name:
- * char *
- * int
- */
- void type_name()
- {
- get_token();
- while (accept("*")) {
- }
- }
- /*
- * statement:
- * { statement-list-opt }
- * type-name identifier ;
- * type-name identifier = expression;
- * if ( expression ) statement
- * if ( expression ) statement else statement
- * while ( expression ) statement
- * return ;
- * expr ;
- */
- void statement()
- {
- int p1;
- int p2;
- if (accept("{")) {
- int n = table_pos;
- int s = stack_pos;
- while (accept("}") == 0)
- statement();
- table_pos = n;
- be_pop(stack_pos - s);
- stack_pos = s;
- }
- else if (peek("char") | peek("int")) {
- type_name();
- sym_declare(token, 'L', stack_pos);
- get_token();
- if (accept("="))
- promote(expression());
- expect(";");
- be_push();
- stack_pos = stack_pos + 1;
- }
- else if (accept("if")) {
- expect("(");
- promote(expression());
- emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */
- p1 = codepos;
- expect(")");
- statement();
- emit(5, "\xe9...."); /* jmp ... */
- p2 = codepos;
- save_int(code + p1 - 4, codepos - p1);
- if (accept("else"))
- statement();
- save_int(code + p2 - 4, codepos - p2);
- }
- else if (accept("while")) {
- expect("(");
- p1 = codepos;
- promote(expression());
- emit(8, "\x85\xc0\x0f\x84...."); /* test %eax,%eax ; je ... */
- p2 = codepos;
- expect(")");
- statement();
- emit(5, "\xe9...."); /* jmp ... */
- save_int(code + codepos - 4, p1 - codepos);
- save_int(code + p2 - 4, codepos - p2);
- }
- else if (accept("return")) {
- if (peek(";") == 0)
- promote(expression());
- expect(";");
- be_pop(stack_pos);
- emit(1, "\xc3"); /* ret */
- }
- else {
- expression();
- expect(";");
- }
- }
- /*
- * program:
- * declaration
- * declaration program
- *
- * declaration:
- * type-name identifier ;
- * type-name identifier ( parameter-list ) ;
- * type-name identifier ( parameter-list ) statement
- *
- * parameter-list:
- * parameter-declaration
- * parameter-list, parameter-declaration
- *
- * parameter-declaration:
- * type-name identifier-opt
- */
- void program()
- {
- int current_symbol;
- int n;
- while (token[0]) {
- type_name();
- current_symbol = sym_declare_global(token);
- get_token();
- if (accept(";")) {
- sym_define_global(current_symbol);
- emit(4, "\x00\x00\x00\x00");
- }
- else if (accept("(")) {
- n = table_pos;
- number_of_args = 0;
- while (accept(")") == 0) {
- number_of_args = number_of_args + 1;
- type_name();
- if (peek(")") == 0) {
- sym_declare(token, 'A', number_of_args);
- get_token();
- }
- accept(","); /* ignore trailing comma */
- }
- if (accept(";") == 0) {
- sym_define_global(current_symbol);
- statement();
- emit(1, "\xc3"); /* ret */
- }
- table_pos = n;
- }
- else
- error();
- }
- }
- int main1()
- {
- code_offset = 134512640; /* 0x08048000 */
- be_start();
- nextc = getchar();
- get_token();
- program();
- be_finish();
- return 0;
- }
|