input.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. /* Copyright 2010, 2011, 2012, 2013, 2014, 2015
  2. Free Software Foundation, Inc.
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. #define _GNU_SOURCE
  14. #include <stdlib.h>
  15. #include <stdio.h>
  16. #include <string.h>
  17. #include <iconv.h>
  18. #include <errno.h>
  19. #include <sys/stat.h>
  20. #include "tree_types.h"
  21. #include "input.h"
  22. #include "text.h"
  23. #include "api.h"
  24. enum input_type { IN_file, IN_text };
  25. enum character_encoding {
  26. ce_latin1,
  27. ce_latin2,
  28. ce_utf8,
  29. ce_shiftjis
  30. };
  31. typedef struct {
  32. enum input_type type;
  33. FILE *file;
  34. char *input_encoding;
  35. LINE_NR line_nr;
  36. char *text; /* Input text to be parsed as Texinfo. */
  37. char *ptext; /* How far we are through 'text'. Used to split 'text'
  38. into lines. */
  39. } INPUT;
  40. static INPUT *input_stack = 0;
  41. int input_number = 0;
  42. int input_space = 0;
  43. /* Current filename and line number. Used for reporting. */
  44. LINE_NR line_nr;
  45. // 1961
  46. /* Collect text from the input sources until a newline is found. This is used
  47. instead of next_text when we need to be sure we get an entire line of
  48. Texinfo input (for example as a line argument to a command), which might not
  49. be the case if the input is the result of a macro expansion.
  50. Return value should not be freed by caller, and becomes invalid after
  51. a subsequent call. */
  52. char *
  53. new_line (void)
  54. {
  55. static TEXT t;
  56. char *new = 0;
  57. t.end = 0;
  58. while (1)
  59. {
  60. new = next_text ();
  61. if (!new)
  62. break;
  63. text_append (&t, new);
  64. free (new);
  65. if (t.text[t.end - 1] == '\n')
  66. break;
  67. }
  68. if (t.end > 0)
  69. return t.text;
  70. else
  71. return 0;
  72. }
  73. /* TODO: integrate with gnulib */
  74. #define ICONV_CONST
  75. static iconv_t iconv_from_latin1 = (iconv_t) 0;
  76. static iconv_t iconv_from_latin2;
  77. static iconv_t iconv_from_shiftjis;
  78. /* Run iconv using text buffer as output buffer. */
  79. size_t
  80. text_buffer_iconv (TEXT *buf, iconv_t iconv_state,
  81. ICONV_CONST char **inbuf, size_t *inbytesleft)
  82. {
  83. size_t out_bytes_left;
  84. char *outptr;
  85. size_t iconv_ret;
  86. outptr = buf->text + buf->end;
  87. if (buf->end == buf->space - 1)
  88. {
  89. errno = E2BIG;
  90. return (size_t) -1;
  91. }
  92. out_bytes_left = buf->space - buf->end - 1;
  93. iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
  94. &outptr, &out_bytes_left);
  95. buf->end = outptr - buf->text;
  96. return iconv_ret;
  97. }
  98. /* Return conversion of S according to ENC. This function frees S. */
  99. static char *
  100. convert_to_utf8 (char *s, char *input_encoding)
  101. {
  102. iconv_t our_iconv;
  103. static TEXT t;
  104. char *inptr; size_t bytes_left;
  105. size_t iconv_ret;
  106. enum character_encoding enc;
  107. /* Convert from @documentencoding to UTF-8.
  108. It might be possible not to convert to UTF-8 and use an 8-bit encoding
  109. throughout, but then we'd have to not set the UTF-8 flag on the Perl
  110. strings in api.c. If multiple character encodings were used in a single
  111. file, then we'd have to keep track of which strings needed the UTF-8 flag
  112. and which didn't. */
  113. /* Could and check for malformed input: see
  114. <http://savannah.gnu.org/bugs/?42896>. */
  115. if (iconv_from_latin1 == (iconv_t) 0)
  116. {
  117. /* Initialize the conversion for the first time. */
  118. iconv_from_latin1 = iconv_open ("UTF-8", "ISO-8859-1");
  119. if (iconv_from_latin1 == (iconv_t) -1)
  120. {
  121. abort ();
  122. /* big trouble. if we do return it unconverted, we will have to
  123. remember not to set the UTF-8 flags on the Perl strings, otherwise
  124. Perl will choke. */
  125. return s;
  126. }
  127. }
  128. if (iconv_from_latin2 == (iconv_t) 0)
  129. {
  130. /* Initialize the conversion for the first time. */
  131. iconv_from_latin2 = iconv_open ("UTF-8", "ISO-8859-2");
  132. if (iconv_from_latin2 == (iconv_t) -1)
  133. iconv_from_latin2 = iconv_from_latin1;
  134. }
  135. if (iconv_from_shiftjis == (iconv_t) 0)
  136. {
  137. /* Initialize the conversion for the first time. */
  138. iconv_from_shiftjis = iconv_open ("UTF-8", "SHIFT-JIS");
  139. if (iconv_from_shiftjis == (iconv_t) -1)
  140. iconv_from_shiftjis = iconv_from_latin1;
  141. }
  142. enc = ce_latin1;
  143. if (!input_encoding)
  144. ;
  145. else if (!strcmp (input_encoding, "utf-8"))
  146. enc = ce_utf8;
  147. else if (!strcmp (input_encoding, "iso-8859-2"))
  148. enc = ce_latin2;
  149. else if (!strcmp (input_encoding, "shift_jis"))
  150. enc = ce_shiftjis;
  151. switch (enc)
  152. {
  153. case ce_utf8:
  154. return s; /* no conversion required. */
  155. break;
  156. case ce_latin1:
  157. our_iconv = iconv_from_latin1;
  158. break;
  159. case ce_latin2:
  160. our_iconv = iconv_from_latin2;
  161. break;
  162. case ce_shiftjis:
  163. our_iconv = iconv_from_shiftjis;
  164. break;
  165. }
  166. t.end = 0;
  167. inptr = s;
  168. bytes_left = strlen (s);
  169. text_alloc (&t, 10);
  170. while (1)
  171. {
  172. iconv_ret = text_buffer_iconv (&t, our_iconv,
  173. &inptr, &bytes_left);
  174. /* Make sure libiconv flushes out the last converted character.
  175. This is required when the conversion is stateful, in which
  176. case libiconv might not output the last character, waiting to
  177. see whether it should be combined with the next one. */
  178. if (iconv_ret != (size_t) -1
  179. && text_buffer_iconv (&t, our_iconv, 0, 0) != (size_t) -1)
  180. /* Success: all of input converted. */
  181. break;
  182. switch (errno)
  183. {
  184. case E2BIG:
  185. text_alloc (&t, t.space + 20);
  186. break;
  187. default:
  188. abort ();
  189. break;
  190. }
  191. }
  192. free (s);
  193. t.text[t.end] = '\0';
  194. //fprintf (stderr, "CONVERTED STRING IS <<%s>>", t.text);
  195. return strdup (t.text);
  196. }
  197. int
  198. expanding_macro (char *macro)
  199. {
  200. int i;
  201. for (i = 0; i < input_number; i++)
  202. {
  203. if (input_stack[i].line_nr.macro
  204. && !strcmp (input_stack[i].line_nr.macro, macro))
  205. {
  206. return 1;
  207. }
  208. }
  209. return 0;
  210. }
  211. /* Return value to be freed by caller. Return null if we are out of input. */
  212. char *
  213. next_text (void)
  214. {
  215. ssize_t status;
  216. char *line = 0;
  217. size_t n;
  218. FILE *input_file;
  219. while (input_number > 0)
  220. {
  221. /* Check for pending input. */
  222. INPUT *i = &input_stack[input_number - 1];
  223. switch (i->type)
  224. {
  225. char *p, *new;
  226. case IN_text:
  227. if (!*i->ptext)
  228. {
  229. /* End of text reached. */
  230. free (i->text);
  231. break;
  232. }
  233. /* Split off a line of input. */
  234. p = strchrnul (i->ptext, '\n');
  235. new = strndup (i->ptext, p - i->ptext + 1);
  236. if (*p)
  237. i->ptext = p + 1;
  238. else
  239. i->ptext = p; /* The next time, we will pop the input source. */
  240. if (!i->line_nr.macro)
  241. i->line_nr.line_nr++;
  242. line_nr = i->line_nr;
  243. return convert_to_utf8 (new, 0); // i->input_encoding);
  244. break;
  245. case IN_file: // 1911
  246. input_file = input_stack[input_number - 1].file;
  247. status = getline (&line, &n, input_file);
  248. while (status != -1)
  249. {
  250. char *comment;
  251. if (feof (input_file))
  252. {
  253. /* Add a newline at the end of the file if one is missing. */
  254. char *line2;
  255. asprintf (&line2, "%s\n", line);
  256. free (line);
  257. line = line2;
  258. }
  259. /* Strip off a comment. */
  260. comment = strchr (line, '\x7F');
  261. if (comment)
  262. *comment = '\0';
  263. // 1920 CPP_LINE_DIRECTIVES
  264. i->line_nr.line_nr++;
  265. line_nr = i->line_nr;
  266. return convert_to_utf8 (line, i->input_encoding);
  267. }
  268. free (line); line = 0;
  269. break;
  270. default:
  271. abort ();
  272. }
  273. /* Top input source failed. Pop it and try the next one. */
  274. if (input_stack[input_number - 1].type == IN_file)
  275. {
  276. FILE *file = input_stack[input_number - 1].file;
  277. if (file != stdin)
  278. {
  279. if (fclose (input_stack[input_number - 1].file) == EOF)
  280. abort (); // error
  281. }
  282. }
  283. input_number--;
  284. }
  285. return 0;
  286. }
  287. void
  288. input_push (char *text, char *macro, char *filename, int line_number)
  289. {
  290. if (input_number == input_space)
  291. {
  292. input_space++; input_space *= 1.5;
  293. input_stack = realloc (input_stack, input_space * sizeof (INPUT));
  294. if (!input_stack)
  295. abort ();
  296. }
  297. input_stack[input_number].type = IN_text;
  298. input_stack[input_number].file = 0;
  299. input_stack[input_number].text = text;
  300. input_stack[input_number].ptext = text;
  301. input_stack[input_number].input_encoding = 0;
  302. if (!macro)
  303. line_number--;
  304. input_stack[input_number].line_nr.line_nr = line_number;
  305. input_stack[input_number].line_nr.file_name = filename;
  306. input_stack[input_number].line_nr.macro = macro;
  307. input_number++;
  308. }
  309. /* Store TEXT as a source for Texinfo content. TEXT will be later free'd
  310. and must be allocated on the heap. MACRO is the name of a macro that
  311. the text came from. */
  312. void
  313. input_push_text (char *text, char *macro)
  314. {
  315. if (text)
  316. input_push (text, macro, 0, line_nr.line_nr);
  317. }
  318. /* Used in tests - like input_push_text, but the lines from the text have
  319. line numbers. */
  320. void
  321. input_push_text_with_line_nos (char *text, int starting)
  322. {
  323. input_push (text, 0, 0, starting);
  324. input_stack[input_number - 1].type = IN_text;
  325. }
  326. void
  327. input_reset_input_stack (void)
  328. {
  329. input_number = 0;
  330. /* TODO: free the memory */
  331. }
  332. int
  333. top_file_index (void)
  334. {
  335. int i = input_number - 1;
  336. while (i >= 0 && input_stack[i].type != IN_file)
  337. i--;
  338. return i;
  339. }
  340. void
  341. set_input_encoding (char *encoding)
  342. {
  343. int i;
  344. /* Set encoding of top file in stack. */
  345. i = top_file_index ();
  346. if (i >= 0)
  347. input_stack[i].input_encoding = encoding;
  348. }
  349. static char **include_dirs;
  350. static size_t include_dirs_number;
  351. static size_t include_dirs_space;
  352. void
  353. add_include_directory (char *filename)
  354. {
  355. int len;
  356. if (include_dirs_number == include_dirs_space)
  357. {
  358. include_dirs = realloc (include_dirs,
  359. sizeof (char *) * (include_dirs_space += 5));
  360. }
  361. filename = strdup (filename);
  362. include_dirs[include_dirs_number++] = filename;
  363. len = strlen (filename);
  364. if (len > 0 && filename[len - 1] == '/')
  365. filename[len - 1] = '\0';
  366. }
  367. char *
  368. locate_include_file (char *filename)
  369. {
  370. char *fullpath;
  371. struct stat dummy;
  372. int i, status;
  373. /* Checks if filename is absolute or relative to current directory.
  374. TODO: Could use macros in top-level config.h for this. */
  375. /* TODO: The Perl code (in Common.pm, 'locate_include_file') handles
  376. a volume in a path (like "A:"), possibly more general treatment
  377. with File::Spec module. */
  378. if (!memcmp (filename, "/", 1)
  379. || !memcmp (filename, "../", 3)
  380. || !memcmp (filename, "./", 2))
  381. {
  382. status = stat (filename, &dummy);
  383. if (status == 0)
  384. return filename;
  385. }
  386. else
  387. {
  388. for (i = 0; i < include_dirs_number; i++)
  389. {
  390. asprintf (&fullpath, "%s/%s", include_dirs[i], filename);
  391. status = stat (fullpath, &dummy);
  392. if (status == 0)
  393. return fullpath;
  394. free (fullpath);
  395. }
  396. }
  397. return 0;
  398. }
  399. /* Try to open a file called FILENAME, looking for it in the list of include
  400. directories. */
  401. int
  402. input_push_file (char *filename)
  403. {
  404. FILE *stream;
  405. stream = fopen (filename, "r");
  406. if (!stream)
  407. return errno;
  408. if (input_number == input_space)
  409. {
  410. input_stack = realloc (input_stack, (input_space += 5) * sizeof (INPUT));
  411. if (!input_stack)
  412. abort ();
  413. }
  414. /* Strip off a leading directory path. */
  415. char *p, *q;
  416. p = 0;
  417. q = strchr (filename, '/');
  418. while (q)
  419. {
  420. p = q;
  421. q = strchr (q + 1, '/');
  422. }
  423. if (p)
  424. filename = strdup (p+1);
  425. input_stack[input_number].type = IN_file;
  426. input_stack[input_number].file = stream;
  427. input_stack[input_number].line_nr.file_name = filename;
  428. input_stack[input_number].line_nr.line_nr = 0;
  429. input_stack[input_number].line_nr.macro = 0;
  430. input_stack[input_number].text = 0;
  431. input_stack[input_number].ptext = 0;
  432. input_stack[input_number].input_encoding = 0;
  433. input_number++;
  434. return 0;
  435. }