cc_reader.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. /* Copyright (C) 2016 Jeremiah Orians
  2. * Copyright (C) 2021 Andrius Štikonas <andrius@stikonas.eu>
  3. * This file is part of M2-Planet.
  4. *
  5. * M2-Planet is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * M2-Planet is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "cc.h"
  19. int strtoint(char *a);
  20. /* Globals */
  21. FILE* input;
  22. struct token_list* token;
  23. int line;
  24. char* file;
  25. #define DEFINE_STATE_NONE 0
  26. //CONSTANT DEFINE_STATE_NONE 0
  27. #define DEFINE_STATE_DEFINE 1
  28. //CONSTANT DEFINE_STATE_DEFINE 1
  29. /* Defines require knowing about whitespace to differentiate a function-like macro
  30. * #define FUNCTION_LIKE_MACRO(x)
  31. * and a regular macro that starts with an open parens
  32. * #define REGULAR_MACRO (x)
  33. * */
  34. int define_state;
  35. int grab_byte(void)
  36. {
  37. int c = fgetc(input);
  38. if(10 == c) line = line + 1;
  39. return c;
  40. }
  41. int clearWhiteSpace(int c)
  42. {
  43. if((32 == c) || (9 == c)) return clearWhiteSpace(grab_byte());
  44. return c;
  45. }
  46. int consume_byte(int c)
  47. {
  48. hold_string[string_index] = c;
  49. string_index = string_index + 1;
  50. require(MAX_STRING > string_index, "Token exceeded MAX_STRING char limit\nuse --max-string number to increase\n");
  51. return grab_byte();
  52. }
  53. int preserve_string(int c)
  54. {
  55. int frequent = c;
  56. int escape = FALSE;
  57. do
  58. {
  59. if(!escape && '\\' == c ) escape = TRUE;
  60. else escape = FALSE;
  61. c = consume_byte(c);
  62. require(EOF != c, "Unterminated string\n");
  63. } while(escape || (c != frequent));
  64. return grab_byte();
  65. }
  66. int copy_string(char* target, char* source, int max)
  67. {
  68. int i = 0;
  69. while(0 != source[i])
  70. {
  71. target[i] = source[i];
  72. i = i + 1;
  73. if(i == max) break;
  74. }
  75. return i;
  76. }
  77. int string_length(char* a)
  78. {
  79. int i = 0;
  80. while(0 != a[i]) i = i + 1;
  81. return i;
  82. }
  83. void fixup_label(void)
  84. {
  85. int hold = ':';
  86. int prev;
  87. int i = 0;
  88. do
  89. {
  90. prev = hold;
  91. hold = hold_string[i];
  92. hold_string[i] = prev;
  93. i = i + 1;
  94. } while(0 != hold);
  95. }
  96. int preserve_keyword(int c, char* S)
  97. {
  98. while(in_set(c, S))
  99. {
  100. c = consume_byte(c);
  101. }
  102. return c;
  103. }
  104. void reset_hold_string(void)
  105. {
  106. int i = MAX_STRING;
  107. while(0 <= i)
  108. {
  109. hold_string[i] = 0;
  110. i = i - 1;
  111. }
  112. string_index = 0;
  113. }
  114. /* note if this is the first token in the list, head needs fixing up */
  115. struct token_list* eat_token(struct token_list* token)
  116. {
  117. if(NULL != token->prev)
  118. {
  119. token->prev->next = token->next;
  120. }
  121. /* update backlinks */
  122. if(NULL != token->next)
  123. {
  124. token->next->prev = token->prev;
  125. }
  126. return token->next;
  127. }
  128. struct token_list* eat_until_newline(struct token_list* head)
  129. {
  130. while (NULL != head)
  131. {
  132. if('\n' == head->s[0])
  133. {
  134. return head;
  135. }
  136. else
  137. {
  138. head = eat_token(head);
  139. }
  140. }
  141. return NULL;
  142. }
  143. struct token_list* remove_line_comments(struct token_list* head)
  144. {
  145. struct token_list* first = NULL;
  146. while (NULL != head)
  147. {
  148. if(match("//", head->s))
  149. {
  150. head = eat_until_newline(head);
  151. }
  152. else
  153. {
  154. if(NULL == first)
  155. {
  156. first = head;
  157. }
  158. head = head->next;
  159. }
  160. }
  161. return first;
  162. }
  163. struct token_list* remove_line_comment_tokens(struct token_list* head)
  164. {
  165. struct token_list* first = NULL;
  166. while (NULL != head)
  167. {
  168. if(match("//", head->s))
  169. {
  170. head = eat_token(head);
  171. }
  172. else
  173. {
  174. if(NULL == first)
  175. {
  176. first = head;
  177. }
  178. head = head->next;
  179. }
  180. }
  181. return first;
  182. }
  183. struct token_list* remove_preprocessor_directives(struct token_list* head)
  184. {
  185. struct token_list* first = NULL;
  186. while (NULL != head)
  187. {
  188. if('#' == head->s[0])
  189. {
  190. head = eat_until_newline(head);
  191. }
  192. else
  193. {
  194. if(NULL == first)
  195. {
  196. first = head;
  197. }
  198. head = head->next;
  199. }
  200. }
  201. return first;
  202. }
  203. void new_token(char* s, int size)
  204. {
  205. struct token_list* current = calloc(1, sizeof(struct token_list));
  206. require(NULL != current, "Exhausted memory while getting token\n");
  207. /* More efficiently allocate memory for string */
  208. current->s = calloc(size, sizeof(char));
  209. require(NULL != current->s, "Exhausted memory while trying to copy a token\n");
  210. copy_string(current->s, s, MAX_STRING);
  211. current->prev = token;
  212. current->next = token;
  213. current->linenumber = line;
  214. current->filename = file;
  215. token = current;
  216. }
  217. int get_token(int c)
  218. {
  219. struct token_list* current = calloc(1, sizeof(struct token_list));
  220. require(NULL != current, "Exhausted memory while getting token\n");
  221. reset:
  222. reset_hold_string();
  223. string_index = 0;
  224. c = clearWhiteSpace(c);
  225. if(c == EOF)
  226. {
  227. free(current);
  228. return c;
  229. }
  230. else if('#' == c)
  231. {
  232. c = consume_byte(c);
  233. c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
  234. if(match(hold_string, "#define"))
  235. {
  236. define_state = DEFINE_STATE_DEFINE;
  237. }
  238. }
  239. else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"))
  240. {
  241. c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
  242. if(':' == c)
  243. {
  244. fixup_label();
  245. c = ' ';
  246. }
  247. else if(define_state == DEFINE_STATE_DEFINE)
  248. {
  249. if(c != '(')
  250. {
  251. define_state = DEFINE_STATE_NONE;
  252. new_token(hold_string, string_index + 2);
  253. new_token(" ", 2);
  254. return c;
  255. }
  256. }
  257. define_state = DEFINE_STATE_NONE;
  258. }
  259. else if(in_set(c, "<=>|&!^%"))
  260. {
  261. c = preserve_keyword(c, "<=>|&!^%");
  262. }
  263. else if(in_set(c, "'\""))
  264. {
  265. c = preserve_string(c);
  266. }
  267. else if(c == '/')
  268. {
  269. c = consume_byte(c);
  270. if(c == '*')
  271. {
  272. c = grab_byte();
  273. while(c != '/')
  274. {
  275. while(c != '*')
  276. {
  277. c = grab_byte();
  278. require(EOF != c, "Hit EOF inside of block comment\n");
  279. }
  280. c = grab_byte();
  281. require(EOF != c, "Hit EOF inside of block comment\n");
  282. }
  283. c = grab_byte();
  284. goto reset;
  285. }
  286. else if(c == '/')
  287. {
  288. c = consume_byte(c);
  289. }
  290. else if(c == '=')
  291. {
  292. c = consume_byte(c);
  293. }
  294. }
  295. else if (c == '\n')
  296. {
  297. c = consume_byte(c);
  298. }
  299. else if(c == '*')
  300. {
  301. c = consume_byte(c);
  302. if(c == '=')
  303. {
  304. c = consume_byte(c);
  305. }
  306. }
  307. else if(c == '+')
  308. {
  309. c = consume_byte(c);
  310. if(c == '=')
  311. {
  312. c = consume_byte(c);
  313. }
  314. if(c == '+')
  315. {
  316. c = consume_byte(c);
  317. }
  318. }
  319. else if(c == '-')
  320. {
  321. c = consume_byte(c);
  322. if(c == '=')
  323. {
  324. c = consume_byte(c);
  325. }
  326. if(c == '>')
  327. {
  328. c = consume_byte(c);
  329. }
  330. if(c == '-')
  331. {
  332. c = consume_byte(c);
  333. }
  334. }
  335. else if(c == '\\')
  336. {
  337. c = consume_byte(c);
  338. if(c == '\n')
  339. {
  340. c = consume_byte(c);
  341. goto reset;
  342. }
  343. }
  344. else
  345. {
  346. c = consume_byte(c);
  347. }
  348. new_token(hold_string, string_index + 2);
  349. return c;
  350. }
  351. int consume_filename(int c)
  352. {
  353. reset_hold_string();
  354. int done = FALSE;
  355. while(!done)
  356. {
  357. if(c == EOF)
  358. {
  359. fputs("we don't support EOF as a filename in #FILENAME statements\n", stderr);
  360. exit(EXIT_FAILURE);
  361. }
  362. else if((32 == c) || (9 == c) || (c == '\n'))
  363. {
  364. c = grab_byte();
  365. }
  366. else
  367. {
  368. do
  369. {
  370. c = consume_byte(c);
  371. require(EOF != c, "Unterminated filename in #FILENAME\n");
  372. } while((32 != c) && (9 != c) && ('\n' != c));
  373. done = TRUE;
  374. }
  375. }
  376. /* with just a little extra to put in the matching at the end */
  377. new_token(hold_string, string_index + 3);
  378. return c;
  379. }
  380. int change_filename(int ch)
  381. {
  382. require(EOF != ch, "#FILENAME failed to receive filename\n");
  383. /* Remove the #FILENAME */
  384. token = token->next;
  385. /* Get new filename */
  386. ch = consume_filename(ch);
  387. file = token->s;
  388. /* Remove it from the processing list */
  389. token = token->next;
  390. require(EOF != ch, "#FILENAME failed to receive filename\n");
  391. /* Get new line number */
  392. define_state = DEFINE_STATE_NONE;
  393. ch = get_token(ch);
  394. line = strtoint(token->s);
  395. if(0 == line)
  396. {
  397. if('0' != token->s[0])
  398. {
  399. fputs("non-line number: ", stderr);
  400. fputs(token->s, stderr);
  401. fputs(" provided to #FILENAME\n", stderr);
  402. exit(EXIT_FAILURE);
  403. }
  404. }
  405. /* Remove it from the processing list */
  406. token = token->next;
  407. return ch;
  408. }
  409. struct token_list* reverse_list(struct token_list* head)
  410. {
  411. struct token_list* root = NULL;
  412. struct token_list* next;
  413. while(NULL != head)
  414. {
  415. next = head->next;
  416. head->next = root;
  417. root = head;
  418. head = next;
  419. }
  420. return root;
  421. }
  422. struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename)
  423. {
  424. input = a;
  425. line = 1;
  426. file = filename;
  427. token = current;
  428. int ch = grab_byte();
  429. define_state = DEFINE_STATE_NONE;
  430. while(EOF != ch)
  431. {
  432. ch = get_token(ch);
  433. require(NULL != token, "Empty files don't need to be compiled\n");
  434. if(match("#FILENAME", token->s)) ch = change_filename(ch);
  435. }
  436. return token;
  437. }