parser_example.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "../sti.h"
  6. enum LexState {
  7. LST_INVALID,
  8. #define PARSER_INCLUDE_ENUMS
  9. #include "./parser_example_generated.h"
  10. #undef PARSER_INCLUDE_ENUMS
  11. LST_MAXVALUE
  12. };
  13. char* state_names[] = {
  14. [LST_INVALID] = "LST_INVALID",
  15. #define PARSER_INCLUDE_ENUM_NAMES
  16. #include "./parser_example_generated.h"
  17. #undef PARSER_INCLUDE_ENUM_NAMES
  18. [LST_MAXVALUE] = "LST_MAXVALUE",
  19. };
  20. #define PARSER_INCLUDE_TERMINAL_DATA_DEFS
  21. #include "./parser_example_generated.h"
  22. #undef PARSER_INCLUDE_TERMINAL_DATA_DEFS
  23. char** state_data[] = {
  24. #define PARSER_INCLUDE_TERMINAL_DATA
  25. #include "./parser_example_generated.h"
  26. #undef PARSER_INCLUDE_TERMINAL_DATA
  27. };
  28. // this is for the incremental lexing of each token, not the whole stream
  29. struct lexer_state {
  30. enum LexState state;
  31. char* buffer;
  32. int blen;
  33. int balloc;
  34. int linenum;
  35. int charnum;
  36. enum LexState tokenState;
  37. int tokenFinished; // buffer should be consumed and cleaned at this point
  38. };
  39. static int eatchar(struct lexer_state* st, int c) {
  40. #define PARSER_INCLUDE_CSETS
  41. #include "./parser_example_generated.h"
  42. #undef PARSER_INCLUDE_CSETS
  43. #define push_char_id(_state) \
  44. do { \
  45. st->state = _state; \
  46. goto PUSH_CHAR_RET; \
  47. } while(0)
  48. #define discard_char_id(_state) \
  49. do { \
  50. st->state = _state; \
  51. return 1; \
  52. } while(0)
  53. #define retry_as(_state) \
  54. do { \
  55. st->state = _state; \
  56. goto RETRY; \
  57. } while(0);
  58. #define done_zero_move(_state) \
  59. do { \
  60. st->state = _state; \
  61. goto TOKEN_DONE; \
  62. } while(0);
  63. #define push_char_done(_state) \
  64. do { \
  65. st->state = _state; \
  66. goto PUSH_CHAR_DONE; \
  67. } while(0);
  68. #define charset_has(cs, c) (c <= cs##_len && !!cs[c])
  69. // hopefully this works
  70. st->charnum++;
  71. if(c == '\n') {
  72. st->linenum++;
  73. st->charnum = 0;
  74. }
  75. RETRY:
  76. switch(st->state) {
  77. #define PARSER_INCLUDE_SWITCH
  78. #include "./parser_example_generated.h"
  79. #undef PARSER_INCLUDE_SWITCH
  80. default:
  81. printf("Lexer reached default: %d\n", st->state);
  82. st->state = LST_NULL;
  83. return 0;
  84. }
  85. assert(0);
  86. // never gets here
  87. ERROR:
  88. printf("Lexer error at line %d:%d: state %d(%s) %d='%c' \n", st->linenum, st->charnum, st->state, state_names[st->state], c, c);
  89. st->state = LST_NULL;
  90. st->blen = 0;
  91. return 1;
  92. TOKEN_DONE:
  93. st->tokenFinished = 1;
  94. st->tokenState = st->state;
  95. return 0;
  96. PUSH_CHAR_RET:
  97. st->buffer[st->blen] = c;
  98. st->blen++;
  99. return 1;
  100. PUSH_CHAR_DONE:
  101. st->buffer[st->blen] = c;
  102. st->blen++;
  103. st->tokenFinished = 1;
  104. st->tokenState = st->state;
  105. return 1;
  106. }
  107. int main(int argc, char* argv[]) {
  108. // char* source = "func(int x, 123 -123 0.123 .123 0123 0x123 1.23e4 1.23E-45 /*yd*fg*/) // error z;\nfoo";
  109. if(argc < 2) {
  110. fprintf(stderr, "test file expected as first argument.\n");
  111. exit(1);
  112. }
  113. char* source = readWholeFile(argv[1], NULL);
  114. struct lexer_state ls = {
  115. .state = 0,
  116. .balloc = 256,
  117. .blen = 0,
  118. .buffer = calloc(1, 256),
  119. .state = LST_NULL,
  120. .linenum = 0,
  121. .charnum = 0,
  122. .tokenState = 0,
  123. .tokenFinished = 0,
  124. };
  125. for(int i = 0; source[i];) {
  126. int ret;
  127. // printf(" eating char: %c\n", line[i]);
  128. ret = eatchar(&ls, source[i]);
  129. // printf(" post-state: %s\n", stateNames[ls.state]);
  130. if(ls.tokenFinished) {
  131. // token is ready
  132. printf("got token: #%d (%s) '%.*s'\n", ls.tokenState, state_names[ls.tokenState], ls.blen, ls.buffer);
  133. // reset the lex state when done reading
  134. ls.tokenFinished = 0;
  135. ls.state = LST_NULL;
  136. ls.blen = 0;
  137. }
  138. if(ret) {
  139. i++; // advance on ret == 1
  140. }
  141. }
  142. printf("last token: #%d (%s) '%.*s'\n", ls.tokenState, state_names[ls.tokenState], ls.blen, ls.buffer);
  143. return 0;
  144. }