tokens.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include "cpp.h"
  5. static char wbuf[2*OBS];
  6. static char *wbp = wbuf;
  7. /*
  8. * 1 for tokens that don't need whitespace when they get inserted
  9. * by macro expansion
  10. */
  11. static const char wstab[] = {
  12. 0, /* END */
  13. 0, /* UNCLASS */
  14. 0, /* NAME */
  15. 0, /* NUMBER */
  16. 0, /* STRING */
  17. 0, /* CCON */
  18. 1, /* NL */
  19. 0, /* WS */
  20. 0, /* DSHARP */
  21. 0, /* EQ */
  22. 0, /* NEQ */
  23. 0, /* LEQ */
  24. 0, /* GEQ */
  25. 0, /* LSH */
  26. 0, /* RSH */
  27. 0, /* LAND */
  28. 0, /* LOR */
  29. 0, /* PPLUS */
  30. 0, /* MMINUS */
  31. 0, /* ARROW */
  32. 1, /* SBRA */
  33. 1, /* SKET */
  34. 1, /* LP */
  35. 1, /* RP */
  36. 0, /* DOT */
  37. 0, /* AND */
  38. 0, /* STAR */
  39. 0, /* PLUS */
  40. 0, /* MINUS */
  41. 0, /* TILDE */
  42. 0, /* NOT */
  43. 0, /* SLASH */
  44. 0, /* PCT */
  45. 0, /* LT */
  46. 0, /* GT */
  47. 0, /* CIRC */
  48. 0, /* OR */
  49. 0, /* QUEST */
  50. 0, /* COLON */
  51. 0, /* ASGN */
  52. 1, /* COMMA */
  53. 0, /* SHARP */
  54. 1, /* SEMIC */
  55. 1, /* CBRA */
  56. 1, /* CKET */
  57. 0, /* ASPLUS */
  58. 0, /* ASMINUS */
  59. 0, /* ASSTAR */
  60. 0, /* ASSLASH */
  61. 0, /* ASPCT */
  62. 0, /* ASCIRC */
  63. 0, /* ASLSH */
  64. 0, /* ASRSH */
  65. 0, /* ASOR */
  66. 0, /* ASAND */
  67. 0, /* ELLIPS */
  68. 0, /* DSHARP1 */
  69. 0, /* NAME1 */
  70. 0, /* DEFINED */
  71. 0, /* UMINUS */
  72. };
  73. void
  74. maketokenrow(int size, Tokenrow *trp)
  75. {
  76. trp->max = size;
  77. if (size>0)
  78. trp->bp = (Token *)domalloc(size*sizeof(Token));
  79. else
  80. trp->bp = NULL;
  81. trp->tp = trp->bp;
  82. trp->lp = trp->bp;
  83. }
  84. Token *
  85. growtokenrow(Tokenrow *trp)
  86. {
  87. int ncur = trp->tp - trp->bp;
  88. int nlast = trp->lp - trp->bp;
  89. trp->max = 3*trp->max/2 + 1;
  90. trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token));
  91. if (trp->bp == NULL)
  92. error(FATAL, "Out of memory from realloc");
  93. trp->lp = &trp->bp[nlast];
  94. trp->tp = &trp->bp[ncur];
  95. return trp->lp;
  96. }
  97. /*
  98. * Compare a row of tokens, ignoring the content of WS; return !=0 if different
  99. */
  100. int
  101. comparetokens(Tokenrow *tr1, Tokenrow *tr2)
  102. {
  103. Token *tp1, *tp2;
  104. tp1 = tr1->tp;
  105. tp2 = tr2->tp;
  106. if (tr1->lp-tp1 != tr2->lp-tp2)
  107. return 1;
  108. for (; tp1<tr1->lp ; tp1++, tp2++) {
  109. if (tp1->type != tp2->type
  110. || (tp1->wslen==0) != (tp2->wslen==0)
  111. || tp1->len != tp2->len
  112. || strncmp((char*)tp1->t, (char*)tp2->t, tp1->len)!=0)
  113. return 1;
  114. }
  115. return 0;
  116. }
  117. /*
  118. * replace ntok tokens starting at dtr->tp with the contents of str.
  119. * tp ends up pointing just beyond the replacement.
  120. * Canonical whitespace is assured on each side.
  121. */
  122. void
  123. insertrow(Tokenrow *dtr, int ntok, Tokenrow *str)
  124. {
  125. int nrtok = rowlen(str);
  126. dtr->tp += ntok;
  127. adjustrow(dtr, nrtok-ntok);
  128. dtr->tp -= ntok;
  129. movetokenrow(dtr, str);
  130. makespace(dtr);
  131. dtr->tp += nrtok;
  132. makespace(dtr);
  133. }
  134. /*
  135. * make sure there is WS before trp->tp, if tokens might merge in the output
  136. */
  137. void
  138. makespace(Tokenrow *trp)
  139. {
  140. uchar *tt;
  141. Token *tp = trp->tp;
  142. if (tp >= trp->lp)
  143. return;
  144. if (tp->wslen) {
  145. if (tp->flag&XPWS
  146. && (wstab[tp->type] || trp->tp>trp->bp && wstab[(tp-1)->type])) {
  147. tp->wslen = 0;
  148. return;
  149. }
  150. tp->t[-1] = ' ';
  151. return;
  152. }
  153. if (wstab[tp->type] || trp->tp>trp->bp && wstab[(tp-1)->type])
  154. return;
  155. tt = newstring(tp->t, tp->len, 1);
  156. *tt++ = ' ';
  157. tp->t = tt;
  158. tp->wslen = 1;
  159. tp->flag |= XPWS;
  160. }
  161. /*
  162. * Copy an entire tokenrow into another, at tp.
  163. * It is assumed that there is enough space.
  164. * Not strictly conforming.
  165. */
  166. void
  167. movetokenrow(Tokenrow *dtr, Tokenrow *str)
  168. {
  169. int nby;
  170. /* nby = sizeof(Token) * (str->lp - str->bp); */
  171. nby = (char *)str->lp - (char *)str->bp;
  172. memmove(dtr->tp, str->bp, nby);
  173. }
  174. /*
  175. * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
  176. * nt may be negative (left move).
  177. * The row may need to be grown.
  178. * Non-strictly conforming because of the (char *), but easily fixed
  179. */
  180. void
  181. adjustrow(Tokenrow *trp, int nt)
  182. {
  183. int nby, size;
  184. if (nt==0)
  185. return;
  186. size = (trp->lp - trp->bp) + nt;
  187. while (size > trp->max)
  188. growtokenrow(trp);
  189. /* nby = sizeof(Token) * (trp->lp - trp->tp); */
  190. nby = (char *)trp->lp - (char *)trp->tp;
  191. if (nby)
  192. memmove(trp->tp+nt, trp->tp, nby);
  193. trp->lp += nt;
  194. }
  195. /*
  196. * Copy a row of tokens into the destination holder, allocating
  197. * the space for the contents. Return the destination.
  198. */
  199. Tokenrow *
  200. copytokenrow(Tokenrow *dtr, Tokenrow *str)
  201. {
  202. int len = rowlen(str);
  203. maketokenrow(len, dtr);
  204. movetokenrow(dtr, str);
  205. dtr->lp += len;
  206. return dtr;
  207. }
  208. /*
  209. * Produce a copy of a row of tokens. Start at trp->tp.
  210. * The value strings are copied as well. The first token
  211. * has WS available.
  212. */
  213. Tokenrow *
  214. normtokenrow(Tokenrow *trp)
  215. {
  216. Token *tp;
  217. Tokenrow *ntrp = new(Tokenrow);
  218. int len;
  219. len = trp->lp - trp->tp;
  220. if (len<=0)
  221. len = 1;
  222. maketokenrow(len, ntrp);
  223. for (tp=trp->tp; tp < trp->lp; tp++) {
  224. *ntrp->lp = *tp;
  225. if (tp->len) {
  226. ntrp->lp->t = newstring(tp->t, tp->len, 1);
  227. *ntrp->lp->t++ = ' ';
  228. if (tp->wslen)
  229. ntrp->lp->wslen = 1;
  230. }
  231. ntrp->lp++;
  232. }
  233. if (ntrp->lp > ntrp->bp)
  234. ntrp->bp->wslen = 0;
  235. return ntrp;
  236. }
  237. /*
  238. * Debugging
  239. */
  240. void
  241. peektokens(Tokenrow *trp, char *str)
  242. {
  243. Token *tp;
  244. tp = trp->tp;
  245. flushout();
  246. if (str)
  247. fprintf(stderr, "%s ", str);
  248. if (tp<trp->bp || tp>trp->lp)
  249. fprintf(stderr, "(tp offset %d) ", tp-trp->bp);
  250. for (tp=trp->bp; tp<trp->lp && tp<trp->bp+32; tp++) {
  251. if (tp->type!=NL) {
  252. int c = tp->t[tp->len];
  253. tp->t[tp->len] = 0;
  254. fprintf(stderr, "%s", tp->t);
  255. tp->t[tp->len] = c;
  256. }
  257. if (tp->type==NAME) {
  258. fprintf(stderr, tp==trp->tp?"{*":"{");
  259. prhideset(tp->hideset);
  260. fprintf(stderr, "} ");
  261. } else
  262. fprintf(stderr, tp==trp->tp?"{%x*} ":"{%x} ", tp->type);
  263. }
  264. fprintf(stderr, "\n");
  265. fflush(stderr);
  266. }
  267. void
  268. puttokens(Tokenrow *trp)
  269. {
  270. Token *tp;
  271. int len;
  272. uchar *p;
  273. if (verbose)
  274. peektokens(trp, "");
  275. tp = trp->bp;
  276. for (; tp<trp->lp; tp++) {
  277. len = tp->len+tp->wslen;
  278. p = tp->t-tp->wslen;
  279. while (tp<trp->lp-1 && p+len == (tp+1)->t - (tp+1)->wslen) {
  280. tp++;
  281. len += tp->wslen+tp->len;
  282. }
  283. if (len>OBS/2) { /* handle giant token */
  284. if (wbp > wbuf)
  285. write(1, wbuf, wbp-wbuf);
  286. write(1, (char *)p, len);
  287. wbp = wbuf;
  288. } else {
  289. memcpy(wbp, p, len);
  290. wbp += len;
  291. }
  292. if (wbp >= &wbuf[OBS]) {
  293. write(1, wbuf, OBS);
  294. if (wbp > &wbuf[OBS])
  295. memcpy(wbuf, wbuf+OBS, wbp - &wbuf[OBS]);
  296. wbp -= OBS;
  297. }
  298. }
  299. trp->tp = tp;
  300. if (cursource->fd==0)
  301. flushout();
  302. }
  303. void
  304. flushout(void)
  305. {
  306. if (wbp>wbuf) {
  307. write(1, wbuf, wbp-wbuf);
  308. wbp = wbuf;
  309. }
  310. }
  311. /*
  312. * turn a row into just a newline
  313. */
  314. void
  315. setempty(Tokenrow *trp)
  316. {
  317. trp->tp = trp->bp;
  318. trp->lp = trp->bp+1;
  319. *trp->bp = nltoken;
  320. }
  321. /*
  322. * generate a number
  323. */
  324. char *
  325. outnum(char *p, int n)
  326. {
  327. if (n>=10)
  328. p = outnum(p, n/10);
  329. *p++ = n%10 + '0';
  330. return p;
  331. }
  332. /*
  333. * allocate and initialize a new string from s, of length l, at offset o
  334. * Null terminated.
  335. */
  336. uchar *
  337. newstring(uchar *s, int l, int o)
  338. {
  339. uchar *ns = (uchar *)domalloc(l+o+1);
  340. ns[l+o] = '\0';
  341. return (uchar*)strncpy((char*)ns+o, (char*)s, l) - o;
  342. }