M1-macro.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. /* -*- c-file-style: "linux";indent-tabs-mode:t -*- */
  2. /* Copyright (C) 2016 Jeremiah Orians
  3. * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org>
  4. * This file is part of mescc-tools.
  5. *
  6. * mescc-tools is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * mescc-tools is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <stdio.h>
  20. #include <stdlib.h>
  21. #include <string.h>
  22. #include <getopt.h>
  23. //CONSTANT max_string 4096
  24. #define max_string 4096
  25. //CONSTANT MACRO 1
  26. #define MACRO 1
  27. //CONSTANT STR 2
  28. #define STR 2
  29. //CONSTANT NEWLINE 3
  30. #define NEWLINE 3
  31. //CONSTANT TRUE 1
  32. #define TRUE 1
  33. //CONSTANT FALSE 0
  34. #define FALSE 0
  35. // CONSTANT KNIGHT 0
  36. #define KNIGHT 0
  37. // CONSTANT X86 1
  38. #define X86 1
  39. // CONSTANT AMD64 2
  40. #define AMD64 2
  41. // CONSTANT ARMV7L 40
  42. #define ARMV7L 40
  43. // CONSTANT AARM64 80
  44. #define AARM64 80
  45. void file_print(char* s, FILE* f);
  46. int match(char* a, char* b);
  47. int string_length(char* a);
  48. char* numerate_number(int a);
  49. int numerate_string(char *a);
  50. int hex2char(int c);
  51. int in_set(int c, char* s);
  52. FILE* source_file;
  53. FILE* destination_file;
  54. int BigEndian;
  55. int BigBitEndian;
  56. int ByteMode;
  57. int Architecture;
  58. int linenumber;
  59. void line_error(char* filename, int linenumber)
  60. {
  61. file_print(filename, stderr);
  62. file_print(":", stderr);
  63. file_print(numerate_number(linenumber), stderr);
  64. file_print(" :", stderr);
  65. }
  66. struct Token
  67. {
  68. struct Token* next;
  69. int type;
  70. char* Text;
  71. char* Expression;
  72. char* filename;
  73. int linenumber;
  74. };
  75. struct Token* newToken(char* filename, int linenumber)
  76. {
  77. struct Token* p;
  78. p = calloc (1, sizeof (struct Token));
  79. if (NULL == p)
  80. {
  81. file_print("calloc failed.\n", stderr);
  82. exit (EXIT_FAILURE);
  83. }
  84. p->filename = filename;
  85. p->linenumber = linenumber;
  86. return p;
  87. }
  88. struct Token* reverse_list(struct Token* head)
  89. {
  90. struct Token* root = NULL;
  91. while(NULL != head)
  92. {
  93. struct Token* next = head->next;
  94. head->next = root;
  95. root = head;
  96. head = next;
  97. }
  98. return root;
  99. }
  100. void purge_lineComment()
  101. {
  102. int c = fgetc(source_file);
  103. while(!in_set(c, "\n\r"))
  104. {
  105. if(EOF == c) break;
  106. c = fgetc(source_file);
  107. }
  108. }
  109. struct Token* append_newline(struct Token* head, char* filename)
  110. {
  111. linenumber = linenumber + 1;
  112. if(NULL == head) return NULL;
  113. if(NEWLINE == head->type)
  114. {/* Don't waste whitespace*/
  115. return head;
  116. }
  117. struct Token* lf = newToken(filename, linenumber);
  118. lf->type = NEWLINE;
  119. lf->next = head;
  120. lf->Text = "\n";
  121. lf->Expression = lf->Text;
  122. return lf;
  123. }
  124. struct Token* store_atom(struct Token* head, char c, char* filename)
  125. {
  126. char* store = calloc(max_string + 1, sizeof(char));
  127. if(NULL == store)
  128. {
  129. file_print("Exhusted available memory\n", stderr);
  130. exit(EXIT_FAILURE);
  131. }
  132. int ch = c;
  133. int i = 0;
  134. do
  135. {
  136. store[i] = ch;
  137. ch = fgetc(source_file);
  138. i = i + 1;
  139. } while (!in_set(ch, "\t\n ") && (i <= max_string));
  140. head->Text = store;
  141. if('\n' == ch)
  142. {
  143. return append_newline(head, filename);
  144. }
  145. return head;
  146. }
  147. char* store_string(char c, char* filename)
  148. {
  149. char* store = calloc(max_string + 1, sizeof(char));
  150. if(NULL == store)
  151. {
  152. file_print("Exhusted available memory\n", stderr);
  153. exit(EXIT_FAILURE);
  154. }
  155. int ch = c;
  156. int i = 0;
  157. do
  158. {
  159. store[i] = ch;
  160. i = i + 1;
  161. ch = fgetc(source_file);
  162. if(-1 == ch)
  163. {
  164. line_error(filename, linenumber);
  165. file_print("Unmatched \"!\n", stderr);
  166. exit(EXIT_FAILURE);
  167. }
  168. if(max_string == i)
  169. {
  170. line_error(filename, linenumber);
  171. file_print("String: ", stderr);
  172. file_print(store, stderr);
  173. file_print(" exceeds max string size\n", stderr);
  174. exit(EXIT_FAILURE);
  175. }
  176. } while(ch != c);
  177. return store;
  178. }
  179. struct Token* Tokenize_Line(struct Token* head, char* filename)
  180. {
  181. int c;
  182. struct Token* p;
  183. linenumber = 1;
  184. do
  185. {
  186. restart:
  187. c = fgetc(source_file);
  188. if(in_set(c, ";#"))
  189. {
  190. purge_lineComment();
  191. head = append_newline(head, filename);
  192. goto restart;
  193. }
  194. if(in_set(c, "\t "))
  195. {
  196. goto restart;
  197. }
  198. if('\n' == c)
  199. {
  200. head = append_newline(head, filename);
  201. goto restart;
  202. }
  203. if(EOF == c)
  204. {
  205. head = append_newline(head, filename);
  206. goto done;
  207. }
  208. p = newToken(filename, linenumber);
  209. p->next = head;
  210. if(in_set(c, "'\""))
  211. {
  212. p->Text = store_string(c, filename);
  213. p->type = STR;
  214. }
  215. else
  216. {
  217. p = store_atom(p, c, filename);
  218. }
  219. head = p;
  220. } while(TRUE);
  221. done:
  222. return head;
  223. }
  224. void setExpression(struct Token* p, char *c, char *Exp)
  225. {
  226. struct Token* i;
  227. for(i = p; NULL != i; i = i->next)
  228. {
  229. /* Leave macros alone */
  230. if(MACRO == i->type)
  231. {
  232. if(match(i->Text, c))
  233. {
  234. line_error(i->filename, i->linenumber);
  235. file_print("Multiple definitions for macro ", stderr);
  236. file_print(c, stderr);
  237. file_print("\n", stderr);
  238. exit(EXIT_FAILURE);
  239. }
  240. continue;
  241. }
  242. else if(match(i->Text, c))
  243. { /* Only if there is an exact match replace */
  244. i->Expression = Exp;
  245. }
  246. }
  247. }
  248. void identify_macros(struct Token* p)
  249. {
  250. struct Token* i;
  251. for(i = p; NULL != i; i = i->next)
  252. {
  253. if(match(i->Text, "DEFINE"))
  254. {
  255. i->type = MACRO;
  256. i->Text = i->next->Text;
  257. if(STR == i->next->next->type)
  258. {
  259. i->Expression = i->next->next->Text + 1;
  260. }
  261. else
  262. {
  263. i->Expression = i->next->next->Text;
  264. }
  265. i->next = i->next->next->next;
  266. }
  267. }
  268. }
  269. void line_macro(struct Token* p)
  270. {
  271. struct Token* i;
  272. for(i = p; NULL != i; i = i->next)
  273. {
  274. if(MACRO == i->type)
  275. {
  276. setExpression(i->next, i->Text, i->Expression);
  277. }
  278. }
  279. }
  280. void hexify_string(struct Token* p)
  281. {
  282. char* table = "0123456789ABCDEF";
  283. int i = string_length(p->Text);
  284. char* d = calloc(((((i >> 2) + 1) << 3) + 1), sizeof(char));
  285. p->Expression = d;
  286. char* S = p->Text;
  287. if(KNIGHT == Architecture)
  288. {
  289. i = ((((i - 1) >> 2) + 1) << 3);
  290. while( 0 < i)
  291. {
  292. i = i - 1;
  293. d[i] = '0';
  294. }
  295. }
  296. while( 0 != S[0])
  297. {
  298. S = S + 1;
  299. d[0] = table[S[0] >> 4];
  300. d[1] = table[S[0] & 0xF];
  301. d = d + 2;
  302. }
  303. }
  304. void process_string(struct Token* p)
  305. {
  306. struct Token* i;
  307. for(i = p; NULL != i; i = i->next)
  308. {
  309. if(STR == i->type)
  310. {
  311. if('\'' == i->Text[0])
  312. {
  313. i->Expression = i->Text + 1;
  314. }
  315. else if('"' == i->Text[0])
  316. {
  317. hexify_string(i);
  318. }
  319. }
  320. }
  321. }
  322. char* pad_nulls(int size, char* nil)
  323. {
  324. if(0 == size) return nil;
  325. size = size * 2;
  326. char* s = calloc(size + 1, sizeof(char));
  327. int i = 0;
  328. while(i < size)
  329. {
  330. s[i] = '0';
  331. i = i + 1;
  332. }
  333. return s;
  334. }
  335. void preserve_other(struct Token* p)
  336. {
  337. struct Token* i;
  338. for(i = p; NULL != i; i = i->next)
  339. {
  340. if((NULL == i->Expression) && !(i->type & MACRO))
  341. {
  342. char c = i->Text[0];
  343. if(in_set(c, "!@$~%&:^"))
  344. {
  345. i->Expression = i->Text;
  346. }
  347. else if('<' == c)
  348. {
  349. i->Expression = pad_nulls(numerate_string(i->Text + 1), i->Text);
  350. }
  351. else
  352. {
  353. line_error(i->filename, i->linenumber);
  354. file_print("Received invalid other; ", stderr);
  355. file_print(i->Text, stderr);
  356. file_print("\n", stderr);
  357. exit(EXIT_FAILURE);
  358. }
  359. }
  360. }
  361. }
  362. void bound_values(int displacement, int number_of_bytes, int low, int high)
  363. {
  364. if((high < displacement) || (displacement < low))
  365. {
  366. file_print("A displacement of ", stderr);
  367. file_print(numerate_number(displacement), stderr);
  368. file_print(" does not fit in ", stderr);
  369. file_print(numerate_number(number_of_bytes), stderr);
  370. file_print(" bytes\n", stderr);
  371. exit(EXIT_FAILURE);
  372. }
  373. }
  374. void range_check(int displacement, int number_of_bytes)
  375. {
  376. if(4 == number_of_bytes) return;
  377. else if(3 == number_of_bytes)
  378. {
  379. bound_values(displacement, number_of_bytes, -8388608, 16777216);
  380. return;
  381. }
  382. else if(2 == number_of_bytes)
  383. {
  384. bound_values(displacement, number_of_bytes, -32768, 65535);
  385. return;
  386. }
  387. else if(1 == number_of_bytes)
  388. {
  389. bound_values(displacement, number_of_bytes, -128, 255);
  390. return;
  391. }
  392. file_print("Received an invalid number of bytes in range_check\n", stderr);
  393. exit(EXIT_FAILURE);
  394. }
  395. void reverseBitOrder(char* c)
  396. {
  397. if(NULL == c) return;
  398. if(0 == c[1]) return;
  399. int hold = c[0];
  400. if(16 == ByteMode)
  401. {
  402. c[0] = c[1];
  403. c[1] = hold;
  404. reverseBitOrder(c+2);
  405. }
  406. else if(8 == ByteMode)
  407. {
  408. c[0] = c[2];
  409. c[2] = hold;
  410. reverseBitOrder(c+3);
  411. }
  412. else if(2 == ByteMode)
  413. {
  414. c[0] = c[7];
  415. c[7] = hold;
  416. hold = c[1];
  417. c[1] = c[6];
  418. c[6] = hold;
  419. hold = c[2];
  420. c[2] = c[5];
  421. c[5] = hold;
  422. hold = c[3];
  423. c[3] = c[4];
  424. c[4] = hold;
  425. reverseBitOrder(c+8);
  426. }
  427. }
  428. void LittleEndian(char* start)
  429. {
  430. char* end = start;
  431. char* c = start;
  432. while(0 != end[0]) end = end + 1;
  433. int hold;
  434. for(end = end - 1; start < end; start = start + 1)
  435. {
  436. hold = start[0];
  437. start[0] = end[0];
  438. end[0] = hold;
  439. end = end - 1;
  440. }
  441. if(BigBitEndian) reverseBitOrder(c);
  442. }
  443. int stringify(char* s, int digits, int divisor, int value, int shift)
  444. {
  445. int i = value;
  446. if(digits > 1)
  447. {
  448. i = stringify(s+1, (digits - 1), divisor, value, shift);
  449. }
  450. s[0] = hex2char(i & (divisor - 1));
  451. return (i >> shift);
  452. }
  453. char* express_number(int value, char c)
  454. {
  455. char* ch = calloc(42, sizeof(char));
  456. int size;
  457. int number_of_bytes;
  458. int shift;
  459. if('!' == c)
  460. {
  461. number_of_bytes = 1;
  462. value = value & 0xFF;
  463. }
  464. else if('@' == c)
  465. {
  466. number_of_bytes = 2;
  467. value = value & 0xFFFF;
  468. }
  469. else if('~' == c)
  470. {
  471. number_of_bytes = 3;
  472. value = value & 0xFFFFFF;
  473. }
  474. else if('%' == c)
  475. {
  476. number_of_bytes = 4;
  477. value = value & 0xFFFFFFFF;
  478. }
  479. else
  480. {
  481. file_print("Given symbol ", stderr);
  482. fputc(c, stderr);
  483. file_print(" to express immediate value ", stderr);
  484. file_print(numerate_number(value), stderr);
  485. fputc('\n', stderr);
  486. exit(EXIT_FAILURE);
  487. }
  488. range_check(value, number_of_bytes);
  489. if(16 == ByteMode)
  490. {
  491. size = number_of_bytes * 2;
  492. shift = 4;
  493. }
  494. else if(8 == ByteMode)
  495. {
  496. size = number_of_bytes * 3;
  497. shift = 3;
  498. }
  499. else if(2 == ByteMode)
  500. {
  501. size = number_of_bytes * 8;
  502. shift = 1;
  503. }
  504. else
  505. {
  506. file_print("Got invalid ByteMode in express_number\n", stderr);
  507. exit(EXIT_FAILURE);
  508. }
  509. stringify(ch, size, ByteMode, value, shift);
  510. if(!BigEndian) LittleEndian(ch);
  511. else if(!BigBitEndian) reverseBitOrder(ch);
  512. return ch;
  513. }
  514. void eval_immediates(struct Token* p)
  515. {
  516. struct Token* i;
  517. for(i = p; NULL != i; i = i->next)
  518. {
  519. if(MACRO == i->type) continue;
  520. else if(NEWLINE == i->type) continue;
  521. else if('<' == i->Text[0]) continue;
  522. else if(NULL == i->Expression)
  523. {
  524. int value;
  525. if((X86 == Architecture) || (AMD64 == Architecture) || (ARMV7L == Architecture) || (AARM64 == Architecture))
  526. {
  527. value = numerate_string(i->Text + 1);
  528. if(('0' == i->Text[1]) || (0 != value))
  529. {
  530. i->Expression = express_number(value, i->Text[0]);
  531. }
  532. }
  533. else if(KNIGHT == Architecture)
  534. {
  535. value = numerate_string(i->Text);
  536. if(('0' == i->Text[0]) || (0 != value))
  537. {
  538. i->Expression = express_number(value, '@');
  539. }
  540. }
  541. else
  542. {
  543. file_print("Unknown architecture received in eval_immediates\n", stderr);
  544. exit(EXIT_FAILURE);
  545. }
  546. }
  547. }
  548. }
  549. void print_hex(struct Token* p)
  550. {
  551. struct Token* i;
  552. for(i = p; NULL != i; i = i->next)
  553. {
  554. if(NEWLINE == i->type)
  555. {
  556. if(NULL == i->next) fputc('\n', destination_file);
  557. else if((NEWLINE != i->next->type) && (MACRO != i->next->type)) fputc('\n', destination_file);
  558. }
  559. else if(i->type != MACRO)
  560. {
  561. file_print(i->Expression, destination_file);
  562. if(NEWLINE != i->next->type) fputc(' ', destination_file);
  563. }
  564. }
  565. }
  566. /* Standard C main program */
  567. int main(int argc, char **argv)
  568. {
  569. BigEndian = TRUE;
  570. struct Token* head = NULL;
  571. Architecture = KNIGHT;
  572. destination_file = stdout;
  573. BigBitEndian = TRUE;
  574. ByteMode = 16;
  575. char* filename;
  576. char* arch;
  577. int option_index = 1;
  578. while(option_index <= argc)
  579. {
  580. if(NULL == argv[option_index])
  581. {
  582. option_index = option_index + 1;
  583. }
  584. else if(match(argv[option_index], "--BigEndian"))
  585. {
  586. BigEndian = TRUE;
  587. option_index = option_index + 1;
  588. }
  589. else if(match(argv[option_index], "--LittleEndian"))
  590. {
  591. BigEndian = FALSE;
  592. option_index = option_index + 1;
  593. }
  594. else if(match(argv[option_index], "-A") || match(argv[option_index], "--architecture"))
  595. {
  596. arch = argv[option_index + 1];
  597. if(match("knight-native", arch) || match("knight-posix", arch)) Architecture = KNIGHT;
  598. else if(match("x86", arch)) Architecture = X86;
  599. else if(match("amd64", arch)) Architecture = AMD64;
  600. else if(match("armv7l", arch)) Architecture = ARMV7L;
  601. else if(match("aarch64", arch)) Architecture = AARM64;
  602. else
  603. {
  604. file_print("Unknown architecture: ", stderr);
  605. file_print(arch, stderr);
  606. file_print(" know values are: knight-native, knight-posix, x86, amd64, armv7l and aarch64", stderr);
  607. exit(EXIT_FAILURE);
  608. }
  609. option_index = option_index + 2;
  610. }
  611. else if(match(argv[option_index], "-b") || match(argv[option_index], "--binary"))
  612. {
  613. ByteMode = 2;
  614. option_index = option_index + 1;
  615. }
  616. else if(match(argv[option_index], "-h") || match(argv[option_index], "--help"))
  617. {
  618. file_print("Usage: ", stderr);
  619. file_print(argv[0], stderr);
  620. file_print(" -f FILENAME1 {-f FILENAME2} (--BigEndian|--LittleEndian) ", stderr);
  621. file_print("[--architecture name]\nArchitectures: knight-native, knight-posix, x86, amd64 and armv7\n", stderr);
  622. file_print("To leverage octal or binary output: --octal, --binary\n", stderr);
  623. exit(EXIT_SUCCESS);
  624. }
  625. else if(match(argv[option_index], "-f") || match(argv[option_index], "--file"))
  626. {
  627. filename = argv[option_index + 1];
  628. source_file = fopen(filename, "r");
  629. if(NULL == source_file)
  630. {
  631. file_print("The file: ", stderr);
  632. file_print(argv[option_index + 1], stderr);
  633. file_print(" can not be opened!\n", stderr);
  634. exit(EXIT_FAILURE);
  635. }
  636. head = Tokenize_Line(head, filename);
  637. option_index = option_index + 2;
  638. }
  639. else if(match(argv[option_index], "-o") || match(argv[option_index], "--output"))
  640. {
  641. destination_file = fopen(argv[option_index + 1], "w");
  642. if(NULL == destination_file)
  643. {
  644. file_print("The file: ", stderr);
  645. file_print(argv[option_index + 1], stderr);
  646. file_print(" can not be opened!\n", stderr);
  647. exit(EXIT_FAILURE);
  648. }
  649. option_index = option_index + 2;
  650. }
  651. else if(match(argv[option_index], "-O") || match(argv[option_index], "--octal"))
  652. {
  653. ByteMode = 8;
  654. option_index = option_index + 1;
  655. }
  656. else if(match(argv[option_index], "-V") || match(argv[option_index], "--version"))
  657. {
  658. file_print("M1 0.6.0\n", stdout);
  659. exit(EXIT_SUCCESS);
  660. }
  661. else
  662. {
  663. file_print("Unknown option\n", stderr);
  664. exit(EXIT_FAILURE);
  665. }
  666. }
  667. if(NULL == head)
  668. {
  669. file_print("Either no input files were given or they were empty\n", stderr);
  670. exit(EXIT_FAILURE);
  671. }
  672. head = reverse_list(head);
  673. identify_macros(head);
  674. line_macro(head);
  675. process_string(head);
  676. eval_immediates(head);
  677. preserve_other(head);
  678. print_hex(head);
  679. return EXIT_SUCCESS;
  680. }