cc_strings.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /* Copyright (C) 2016 Jeremiah Orians
  2. * Copyright (C) 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
  3. * This file is part of M2-Planet.
  4. *
  5. * M2-Planet is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * M2-Planet is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "cc.h"
  19. #include <stdint.h>
  20. struct token_list* emit(char *s, struct token_list* head);
  21. void require(int bool, char* error);
  22. char upcase(char a)
  23. {
  24. if(in_set(a, "abcdefghijklmnopqrstuvwxyz"))
  25. {
  26. a = a - 32;
  27. }
  28. return a;
  29. }
  30. int char2hex(int c)
  31. {
  32. if (c >= '0' && c <= '9') return (c - 48);
  33. else if (c >= 'a' && c <= 'f') return (c - 87);
  34. else if (c >= 'A' && c <= 'F') return (c - 55);
  35. else return -1;
  36. }
  37. int hexify(int c, int high)
  38. {
  39. int i = char2hex(c);
  40. if(0 > i)
  41. {
  42. fputs("Tried to print non-hex number\n", stderr);
  43. exit(EXIT_FAILURE);
  44. }
  45. if(high)
  46. {
  47. i = i << 4;
  48. }
  49. return i;
  50. }
  51. int digit_is_octal(char digit)
  52. {
  53. return digit >= '0' && digit <= '7';
  54. }
  55. int parse_octal_escape_code(char* digits)
  56. {
  57. int result = 0;
  58. int i = 0;
  59. while(digit_is_octal(digits[i]) && i < 3)
  60. {
  61. result = result * 8;
  62. result = result + (digits[i] - '0');
  63. i = i + 1;
  64. }
  65. /* implementation defined behavior: Octals above 0177 (127) wrap around in 2s complement. Same as GCC. */
  66. while(result > 127)
  67. {
  68. result = result - 256;
  69. }
  70. return result;
  71. }
  72. int amount_of_escaped_chars_to_skip(char* string)
  73. {
  74. if (string[1] == 'x') return 3;
  75. else if (digit_is_octal(string[1]))
  76. {
  77. int i = 2;
  78. while(digit_is_octal(string[i]) && i < 4)
  79. {
  80. i = i + 1;
  81. }
  82. return i - 1;
  83. }
  84. return 1;
  85. }
  86. int escape_lookup(char* c);
  87. int weird(char* string)
  88. {
  89. int c;
  90. string = string + 1;
  91. weird_reset:
  92. c = string[0];
  93. if(0 == c) return FALSE;
  94. if('\\' == c)
  95. {
  96. c = escape_lookup(string);
  97. string = string + amount_of_escaped_chars_to_skip(string);
  98. }
  99. if(!in_set(c, "\t\n !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")) return TRUE;
  100. if(in_set(c, " \t\n\r") && (':' == string[1])) return TRUE;
  101. string = string + 1;
  102. goto weird_reset;
  103. }
  104. /* Lookup escape values */
  105. int escape_lookup(char* c)
  106. {
  107. if('\\' != c[0]) return c[0];
  108. if(c[1] == 'x')
  109. {
  110. int t1 = hexify(c[2], TRUE);
  111. int t2 = hexify(c[3], FALSE);
  112. return t1 + t2;
  113. }
  114. else if(c[1] == 'a') return 7;
  115. else if(c[1] == 'b') return 8;
  116. else if(c[1] == 't') return 9;
  117. else if(c[1] == 'n') return 10;
  118. else if(c[1] == 'v') return 11;
  119. else if(c[1] == 'f') return 12;
  120. else if(c[1] == 'r') return 13;
  121. else if(c[1] == 'e') return 27;
  122. else if(c[1] == '"') return 34;
  123. else if(c[1] == '\'') return 39;
  124. else if(c[1] == '\\') return 92;
  125. else if(c[1] == '?') return 63;
  126. else if(digit_is_octal(c[1])) return parse_octal_escape_code(c + 1);
  127. fputs("Unknown escape received: ", stderr);
  128. fputs(c, stderr);
  129. fputs(" Unable to process\n", stderr);
  130. exit(EXIT_FAILURE);
  131. }
  132. /* Deal with human strings */
  133. char* collect_regular_string(char* string)
  134. {
  135. string_index = 0;
  136. collect_regular_string_reset:
  137. require((MAX_STRING - 3) > string_index, "Attempt at parsing regular string exceeds max length\n");
  138. if(string[0] == '\\')
  139. {
  140. hold_string[string_index] = escape_lookup(string);
  141. string = string + amount_of_escaped_chars_to_skip(string) + 1;
  142. }
  143. else
  144. {
  145. hold_string[string_index] = string[0];
  146. string = string + 1;
  147. }
  148. string_index = string_index + 1;
  149. if(string[0] != 0) goto collect_regular_string_reset;
  150. hold_string[string_index] = '"';
  151. hold_string[string_index + 1] = '\n';
  152. char* message = calloc(string_index + 3, sizeof(char));
  153. require(NULL != message, "Exhausted memory while storing regular string\n");
  154. copy_string(message, hold_string, string_index + 2);
  155. reset_hold_string();
  156. return message;
  157. }
  158. /* Deal with non-human strings */
  159. char* collect_weird_string(char* string)
  160. {
  161. string_index = 1;
  162. int temp;
  163. char* table = "0123456789ABCDEF";
  164. hold_string[0] = '\'';
  165. collect_weird_string_reset:
  166. require((MAX_STRING - 6) > string_index, "Attempt at parsing weird string exceeds max length\n");
  167. string = string + 1;
  168. hold_string[string_index] = ' ';
  169. temp = escape_lookup(string) & 0xFF;
  170. hold_string[string_index + 1] = table[(temp >> 4)];
  171. hold_string[string_index + 2] = table[(temp & 15)];
  172. if(string[0] == '\\')
  173. {
  174. string = string + amount_of_escaped_chars_to_skip(string);
  175. }
  176. string_index = string_index + 3;
  177. if(string[1] != 0) goto collect_weird_string_reset;
  178. hold_string[string_index] = ' ';
  179. hold_string[string_index + 1] = '0';
  180. hold_string[string_index + 2] = '0';
  181. hold_string[string_index + 3] = '\'';
  182. hold_string[string_index + 4] = '\n';
  183. char* hold = calloc(string_index + 6, sizeof(char));
  184. require(NULL != hold, "Exhausted available memory while attempting to collect a weird string\n");
  185. copy_string(hold, hold_string, string_index + 5);
  186. reset_hold_string();
  187. return hold;
  188. }
  189. /* Parse string to deal with hex characters*/
  190. char* parse_string(char* string)
  191. {
  192. /* the string */
  193. if(weird(string)) return collect_weird_string(string);
  194. else return collect_regular_string(string);
  195. }