read.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299
  1. /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008 Free Software
  2. * Foundation, Inc.
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #ifdef HAVE_CONFIG_H
  19. # include <config.h>
  20. #endif
  21. #include <stdio.h>
  22. #include <ctype.h>
  23. #include <string.h>
  24. #ifdef HAVE_STRINGS_H
  25. # include <strings.h>
  26. #endif
  27. #include "libguile/_scm.h"
  28. #include "libguile/chars.h"
  29. #include "libguile/eval.h"
  30. #include "libguile/unif.h"
  31. #include "libguile/keywords.h"
  32. #include "libguile/alist.h"
  33. #include "libguile/srcprop.h"
  34. #include "libguile/hashtab.h"
  35. #include "libguile/hash.h"
  36. #include "libguile/ports.h"
  37. #include "libguile/root.h"
  38. #include "libguile/strings.h"
  39. #include "libguile/strports.h"
  40. #include "libguile/vectors.h"
  41. #include "libguile/validate.h"
  42. #include "libguile/srfi-4.h"
  43. #include "libguile/srfi-13.h"
  44. #include "libguile/read.h"
  45. SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
  46. SCM_SYMBOL (scm_keyword_prefix, "prefix");
  47. SCM_SYMBOL (scm_keyword_postfix, "postfix");
  48. scm_t_option scm_read_opts[] = {
  49. { SCM_OPTION_BOOLEAN, "copy", 0,
  50. "Copy source code expressions." },
  51. { SCM_OPTION_BOOLEAN, "positions", 0,
  52. "Record positions of source code expressions." },
  53. { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
  54. "Convert symbols to lower case."},
  55. { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
  56. "Style of keyword recognition: #f, 'prefix or 'postfix."}
  57. #if SCM_ENABLE_ELISP
  58. ,
  59. { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
  60. "Support Elisp vector syntax, namely `[...]'."},
  61. { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
  62. "Support `\\(' and `\\)' in strings."}
  63. #endif
  64. };
  65. /*
  66. Give meaningful error messages for errors
  67. We use the format
  68. FILE:LINE:COL: MESSAGE
  69. This happened in ....
  70. This is not standard GNU format, but the test-suite likes the real
  71. message to be in front.
  72. */
  73. void
  74. scm_i_input_error (char const *function,
  75. SCM port, const char *message, SCM arg)
  76. {
  77. SCM fn = (scm_is_string (SCM_FILENAME(port))
  78. ? SCM_FILENAME(port)
  79. : scm_from_locale_string ("#<unknown port>"));
  80. SCM string_port = scm_open_output_string ();
  81. SCM string = SCM_EOL;
  82. scm_simple_format (string_port,
  83. scm_from_locale_string ("~A:~S:~S: ~A"),
  84. scm_list_4 (fn,
  85. scm_from_long (SCM_LINUM (port) + 1),
  86. scm_from_int (SCM_COL (port) + 1),
  87. scm_from_locale_string (message)));
  88. string = scm_get_output_string (string_port);
  89. scm_close_output_port (string_port);
  90. scm_error_scm (scm_from_locale_symbol ("read-error"),
  91. function? scm_from_locale_string (function) : SCM_BOOL_F,
  92. string,
  93. arg,
  94. SCM_BOOL_F);
  95. }
  96. SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
  97. (SCM setting),
  98. "Option interface for the read options. Instead of using\n"
  99. "this procedure directly, use the procedures @code{read-enable},\n"
  100. "@code{read-disable}, @code{read-set!} and @code{read-options}.")
  101. #define FUNC_NAME s_scm_read_options
  102. {
  103. SCM ans = scm_options (setting,
  104. scm_read_opts,
  105. SCM_N_READ_OPTIONS,
  106. FUNC_NAME);
  107. if (SCM_COPY_SOURCE_P)
  108. SCM_RECORD_POSITIONS_P = 1;
  109. return ans;
  110. }
  111. #undef FUNC_NAME
  112. /* An association list mapping extra hash characters to procedures. */
  113. static SCM *scm_read_hash_procedures;
  114. /* Token readers. */
  115. /* Size of the C buffer used to read symbols and numbers. */
  116. #define READER_BUFFER_SIZE 128
  117. /* Size of the C buffer used to read strings. */
  118. #define READER_STRING_BUFFER_SIZE 512
  119. /* The maximum size of Scheme character names. */
  120. #define READER_CHAR_NAME_MAX_SIZE 50
  121. /* `isblank' is only in C99. */
  122. #define CHAR_IS_BLANK_(_chr) \
  123. (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
  124. || ((_chr) == '\f') || ((_chr) == '\r'))
  125. #ifdef MSDOS
  126. # define CHAR_IS_BLANK(_chr) \
  127. ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
  128. #else
  129. # define CHAR_IS_BLANK CHAR_IS_BLANK_
  130. #endif
  131. /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
  132. structure''). */
  133. #define CHAR_IS_R5RS_DELIMITER(c) \
  134. (CHAR_IS_BLANK (c) \
  135. || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
  136. #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
  137. /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
  138. Structure''. */
  139. #define CHAR_IS_EXPONENT_MARKER(_chr) \
  140. (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
  141. || ((_chr) == 'd') || ((_chr) == 'l'))
  142. /* An inlinable version of `scm_c_downcase ()'. */
  143. #define CHAR_DOWNCASE(_chr) \
  144. (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
  145. #ifndef HAVE_DECL_STRNCASECMP
  146. extern int strncasecmp (char const *s1, char const *s2, size_t n);
  147. #endif
  148. #ifndef HAVE_STRNCASECMP
  149. /* XXX: Use Gnulib's `strncasecmp ()'. */
  150. static int
  151. strncasecmp (const char *s1, const char *s2, size_t len2)
  152. {
  153. while (*s1 && *s2 && len2 > 0)
  154. {
  155. int c1 = *s1, c2 = *s2;
  156. if (CHAR_DOWNCASE (c1) != CHAR_DOWNCASE (c2))
  157. return 0;
  158. else
  159. {
  160. ++s1;
  161. ++s2;
  162. --len2;
  163. }
  164. }
  165. return !(*s1 || *s2 || len2 > 0);
  166. }
  167. #endif
  168. /* Read an SCSH block comment. */
  169. static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
  170. /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
  171. zero if the whole token fits in BUF, non-zero otherwise. */
  172. static inline int
  173. read_token (SCM port, char *buf, size_t buf_size, size_t *read)
  174. {
  175. *read = 0;
  176. while (*read < buf_size)
  177. {
  178. int chr;
  179. chr = scm_getc (port);
  180. chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
  181. if (chr == EOF)
  182. return 0;
  183. else if (CHAR_IS_DELIMITER (chr))
  184. {
  185. scm_ungetc (chr, port);
  186. return 0;
  187. }
  188. else
  189. {
  190. *buf = (char) chr;
  191. buf++, (*read)++;
  192. }
  193. }
  194. return 1;
  195. }
  196. /* Skip whitespace from PORT and return the first non-whitespace character
  197. read. Raise an error on end-of-file. */
  198. static int
  199. flush_ws (SCM port, const char *eoferr)
  200. {
  201. register int c;
  202. while (1)
  203. switch (c = scm_getc (port))
  204. {
  205. case EOF:
  206. goteof:
  207. if (eoferr)
  208. {
  209. scm_i_input_error (eoferr,
  210. port,
  211. "end of file",
  212. SCM_EOL);
  213. }
  214. return c;
  215. case ';':
  216. lp:
  217. switch (c = scm_getc (port))
  218. {
  219. case EOF:
  220. goto goteof;
  221. default:
  222. goto lp;
  223. case SCM_LINE_INCREMENTORS:
  224. break;
  225. }
  226. break;
  227. case '#':
  228. switch (c = scm_getc (port))
  229. {
  230. case EOF:
  231. eoferr = "read_sharp";
  232. goto goteof;
  233. case '!':
  234. scm_read_scsh_block_comment (c, port);
  235. break;
  236. default:
  237. scm_ungetc (c, port);
  238. return '#';
  239. }
  240. break;
  241. case SCM_LINE_INCREMENTORS:
  242. case SCM_SINGLE_SPACES:
  243. case '\t':
  244. break;
  245. default:
  246. return c;
  247. }
  248. return 0;
  249. }
  250. /* Token readers. */
  251. static SCM scm_read_expression (SCM port);
  252. static SCM scm_read_sharp (int chr, SCM port);
  253. static SCM scm_get_hash_procedure (int c);
  254. static SCM recsexpr (SCM obj, long line, int column, SCM filename);
  255. static SCM
  256. scm_read_sexp (int chr, SCM port)
  257. #define FUNC_NAME "scm_i_lreadparen"
  258. {
  259. register int c;
  260. register SCM tmp;
  261. register SCM tl, ans = SCM_EOL;
  262. SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
  263. static const int terminating_char = ')';
  264. /* Need to capture line and column numbers here. */
  265. long line = SCM_LINUM (port);
  266. int column = SCM_COL (port) - 1;
  267. c = flush_ws (port, FUNC_NAME);
  268. if (terminating_char == c)
  269. return SCM_EOL;
  270. scm_ungetc (c, port);
  271. if (scm_is_eq (scm_sym_dot,
  272. (tmp = scm_read_expression (port))))
  273. {
  274. ans = scm_read_expression (port);
  275. if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
  276. scm_i_input_error (FUNC_NAME, port, "missing close paren",
  277. SCM_EOL);
  278. return ans;
  279. }
  280. /* Build the head of the list structure. */
  281. ans = tl = scm_cons (tmp, SCM_EOL);
  282. if (SCM_COPY_SOURCE_P)
  283. ans2 = tl2 = scm_cons (scm_is_pair (tmp)
  284. ? copy
  285. : tmp,
  286. SCM_EOL);
  287. while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
  288. {
  289. SCM new_tail;
  290. scm_ungetc (c, port);
  291. if (scm_is_eq (scm_sym_dot,
  292. (tmp = scm_read_expression (port))))
  293. {
  294. SCM_SETCDR (tl, tmp = scm_read_expression (port));
  295. if (SCM_COPY_SOURCE_P)
  296. SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
  297. SCM_EOL));
  298. c = flush_ws (port, FUNC_NAME);
  299. if (terminating_char != c)
  300. scm_i_input_error (FUNC_NAME, port,
  301. "in pair: missing close paren", SCM_EOL);
  302. goto exit;
  303. }
  304. new_tail = scm_cons (tmp, SCM_EOL);
  305. SCM_SETCDR (tl, new_tail);
  306. tl = new_tail;
  307. if (SCM_COPY_SOURCE_P)
  308. {
  309. SCM new_tail2 = scm_cons (scm_is_pair (tmp)
  310. ? copy
  311. : tmp, SCM_EOL);
  312. SCM_SETCDR (tl2, new_tail2);
  313. tl2 = new_tail2;
  314. }
  315. }
  316. exit:
  317. if (SCM_RECORD_POSITIONS_P)
  318. scm_whash_insert (scm_source_whash,
  319. ans,
  320. scm_make_srcprops (line, column,
  321. SCM_FILENAME (port),
  322. SCM_COPY_SOURCE_P
  323. ? ans2
  324. : SCM_UNDEFINED,
  325. SCM_EOL));
  326. return ans;
  327. }
  328. #undef FUNC_NAME
  329. static SCM
  330. scm_read_string (int chr, SCM port)
  331. #define FUNC_NAME "scm_lreadr"
  332. {
  333. /* For strings smaller than C_STR, this function creates only one Scheme
  334. object (the string returned). */
  335. SCM str = SCM_BOOL_F;
  336. char c_str[READER_STRING_BUFFER_SIZE];
  337. unsigned c_str_len = 0;
  338. int c;
  339. while ('"' != (c = scm_getc (port)))
  340. {
  341. if (c == EOF)
  342. str_eof: scm_i_input_error (FUNC_NAME, port,
  343. "end of file in string constant",
  344. SCM_EOL);
  345. if (c_str_len + 1 >= sizeof (c_str))
  346. {
  347. /* Flush the C buffer onto a Scheme string. */
  348. SCM addy;
  349. if (str == SCM_BOOL_F)
  350. str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
  351. addy = scm_from_locale_stringn (c_str, c_str_len);
  352. str = scm_string_append_shared (scm_list_2 (str, addy));
  353. c_str_len = 0;
  354. }
  355. if (c == '\\')
  356. switch (c = scm_getc (port))
  357. {
  358. case EOF:
  359. goto str_eof;
  360. case '"':
  361. case '\\':
  362. break;
  363. #if SCM_ENABLE_ELISP
  364. case '(':
  365. case ')':
  366. if (SCM_ESCAPED_PARENS_P)
  367. break;
  368. goto bad_escaped;
  369. #endif
  370. case '\n':
  371. continue;
  372. case '0':
  373. c = '\0';
  374. break;
  375. case 'f':
  376. c = '\f';
  377. break;
  378. case 'n':
  379. c = '\n';
  380. break;
  381. case 'r':
  382. c = '\r';
  383. break;
  384. case 't':
  385. c = '\t';
  386. break;
  387. case 'a':
  388. c = '\007';
  389. break;
  390. case 'v':
  391. c = '\v';
  392. break;
  393. case 'x':
  394. {
  395. int a, b;
  396. a = scm_getc (port);
  397. if (a == EOF) goto str_eof;
  398. b = scm_getc (port);
  399. if (b == EOF) goto str_eof;
  400. if ('0' <= a && a <= '9') a -= '0';
  401. else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
  402. else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
  403. else goto bad_escaped;
  404. if ('0' <= b && b <= '9') b -= '0';
  405. else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
  406. else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
  407. else goto bad_escaped;
  408. c = a * 16 + b;
  409. break;
  410. }
  411. default:
  412. bad_escaped:
  413. scm_i_input_error (FUNC_NAME, port,
  414. "illegal character in escape sequence: ~S",
  415. scm_list_1 (SCM_MAKE_CHAR (c)));
  416. }
  417. c_str[c_str_len++] = c;
  418. }
  419. if (c_str_len > 0)
  420. {
  421. SCM addy;
  422. addy = scm_from_locale_stringn (c_str, c_str_len);
  423. if (str == SCM_BOOL_F)
  424. str = addy;
  425. else
  426. str = scm_string_append_shared (scm_list_2 (str, addy));
  427. }
  428. else
  429. str = (str == SCM_BOOL_F) ? scm_nullstr : str;
  430. return str;
  431. }
  432. #undef FUNC_NAME
  433. static SCM
  434. scm_read_number (int chr, SCM port)
  435. {
  436. SCM result, str = SCM_EOL;
  437. char buffer[READER_BUFFER_SIZE];
  438. size_t read;
  439. int overflow = 0;
  440. scm_ungetc (chr, port);
  441. do
  442. {
  443. overflow = read_token (port, buffer, sizeof (buffer), &read);
  444. if ((overflow) || (scm_is_pair (str)))
  445. str = scm_cons (scm_from_locale_stringn (buffer, read), str);
  446. }
  447. while (overflow);
  448. if (scm_is_pair (str))
  449. {
  450. /* The slow path. */
  451. str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
  452. result = scm_string_to_number (str, SCM_UNDEFINED);
  453. if (!scm_is_true (result))
  454. /* Return a symbol instead of a number. */
  455. result = scm_string_to_symbol (str);
  456. }
  457. else
  458. {
  459. result = scm_c_locale_stringn_to_number (buffer, read, 10);
  460. if (!scm_is_true (result))
  461. /* Return a symbol instead of a number. */
  462. result = scm_from_locale_symboln (buffer, read);
  463. }
  464. return result;
  465. }
  466. static SCM
  467. scm_read_mixed_case_symbol (int chr, SCM port)
  468. {
  469. SCM result, str = SCM_EOL;
  470. int overflow = 0, ends_with_colon = 0;
  471. char buffer[READER_BUFFER_SIZE];
  472. size_t read = 0;
  473. int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
  474. scm_ungetc (chr, port);
  475. do
  476. {
  477. overflow = read_token (port, buffer, sizeof (buffer), &read);
  478. if (read > 0)
  479. ends_with_colon = (buffer[read - 1] == ':');
  480. if ((overflow) || (scm_is_pair (str)))
  481. str = scm_cons (scm_from_locale_stringn (buffer, read), str);
  482. }
  483. while (overflow);
  484. if (scm_is_pair (str))
  485. {
  486. size_t len;
  487. str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
  488. len = scm_c_string_length (str);
  489. /* Per SRFI-88, `:' alone is an identifier, not a keyword. */
  490. if (postfix && ends_with_colon && (len > 1))
  491. {
  492. /* Strip off colon. */
  493. str = scm_c_substring (str, 0, len-1);
  494. result = scm_string_to_symbol (str);
  495. result = scm_symbol_to_keyword (result);
  496. }
  497. else
  498. result = scm_string_to_symbol (str);
  499. }
  500. else
  501. {
  502. /* For symbols smaller than `sizeof (buffer)', we don't need to recur
  503. to Scheme strings. Therefore, we only create one Scheme object (a
  504. symbol) per symbol read. */
  505. if (postfix && ends_with_colon && (read > 1))
  506. result = scm_from_locale_keywordn (buffer, read - 1);
  507. else
  508. result = scm_from_locale_symboln (buffer, read);
  509. }
  510. return result;
  511. }
  512. static SCM
  513. scm_read_number_and_radix (int chr, SCM port)
  514. #define FUNC_NAME "scm_lreadr"
  515. {
  516. SCM result, str = SCM_EOL;
  517. size_t read;
  518. char buffer[READER_BUFFER_SIZE];
  519. unsigned int radix;
  520. int overflow = 0;
  521. switch (chr)
  522. {
  523. case 'B':
  524. case 'b':
  525. radix = 2;
  526. break;
  527. case 'o':
  528. case 'O':
  529. radix = 8;
  530. break;
  531. case 'd':
  532. case 'D':
  533. radix = 10;
  534. break;
  535. case 'x':
  536. case 'X':
  537. radix = 16;
  538. break;
  539. default:
  540. scm_ungetc (chr, port);
  541. scm_ungetc ('#', port);
  542. radix = 10;
  543. }
  544. do
  545. {
  546. overflow = read_token (port, buffer, sizeof (buffer), &read);
  547. if ((overflow) || (scm_is_pair (str)))
  548. str = scm_cons (scm_from_locale_stringn (buffer, read), str);
  549. }
  550. while (overflow);
  551. if (scm_is_pair (str))
  552. {
  553. str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
  554. result = scm_string_to_number (str, scm_from_uint (radix));
  555. }
  556. else
  557. result = scm_c_locale_stringn_to_number (buffer, read, radix);
  558. if (scm_is_true (result))
  559. return result;
  560. scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
  561. return SCM_BOOL_F;
  562. }
  563. #undef FUNC_NAME
  564. static SCM
  565. scm_read_quote (int chr, SCM port)
  566. {
  567. SCM p;
  568. long line = SCM_LINUM (port);
  569. int column = SCM_COL (port) - 1;
  570. switch (chr)
  571. {
  572. case '`':
  573. p = scm_sym_quasiquote;
  574. break;
  575. case '\'':
  576. p = scm_sym_quote;
  577. break;
  578. case ',':
  579. {
  580. int c;
  581. c = scm_getc (port);
  582. if ('@' == c)
  583. p = scm_sym_uq_splicing;
  584. else
  585. {
  586. scm_ungetc (c, port);
  587. p = scm_sym_unquote;
  588. }
  589. break;
  590. }
  591. default:
  592. fprintf (stderr, "%s: unhandled quote character (%i)\n",
  593. "scm_read_quote", chr);
  594. abort ();
  595. }
  596. p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
  597. if (SCM_RECORD_POSITIONS_P)
  598. scm_whash_insert (scm_source_whash, p,
  599. scm_make_srcprops (line, column,
  600. SCM_FILENAME (port),
  601. SCM_COPY_SOURCE_P
  602. ? (scm_cons2 (SCM_CAR (p),
  603. SCM_CAR (SCM_CDR (p)),
  604. SCM_EOL))
  605. : SCM_UNDEFINED,
  606. SCM_EOL));
  607. return p;
  608. }
  609. static inline SCM
  610. scm_read_semicolon_comment (int chr, SCM port)
  611. {
  612. int c;
  613. for (c = scm_getc (port);
  614. (c != EOF) && (c != '\n');
  615. c = scm_getc (port));
  616. return SCM_UNSPECIFIED;
  617. }
  618. /* Sharp readers, i.e. readers called after a `#' sign has been read. */
  619. static SCM
  620. scm_read_boolean (int chr, SCM port)
  621. {
  622. switch (chr)
  623. {
  624. case 't':
  625. case 'T':
  626. return SCM_BOOL_T;
  627. case 'f':
  628. case 'F':
  629. return SCM_BOOL_F;
  630. }
  631. return SCM_UNSPECIFIED;
  632. }
  633. static SCM
  634. scm_read_character (int chr, SCM port)
  635. #define FUNC_NAME "scm_lreadr"
  636. {
  637. unsigned c;
  638. char charname[READER_CHAR_NAME_MAX_SIZE];
  639. size_t charname_len;
  640. if (read_token (port, charname, sizeof (charname), &charname_len))
  641. goto char_error;
  642. if (charname_len == 0)
  643. {
  644. chr = scm_getc (port);
  645. if (chr == EOF)
  646. scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
  647. "while reading character", SCM_EOL);
  648. /* CHR must be a token delimiter, like a whitespace. */
  649. return (SCM_MAKE_CHAR (chr));
  650. }
  651. if (charname_len == 1)
  652. return SCM_MAKE_CHAR (charname[0]);
  653. if (*charname >= '0' && *charname < '8')
  654. {
  655. /* Dirk:FIXME:: This type of character syntax is not R5RS
  656. * compliant. Further, it should be verified that the constant
  657. * does only consist of octal digits. Finally, it should be
  658. * checked whether the resulting fixnum is in the range of
  659. * characters. */
  660. SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
  661. if (SCM_I_INUMP (p))
  662. return SCM_MAKE_CHAR (SCM_I_INUM (p));
  663. }
  664. for (c = 0; c < scm_n_charnames; c++)
  665. if (scm_charnames[c]
  666. && (!strncasecmp (scm_charnames[c], charname, charname_len)))
  667. return SCM_MAKE_CHAR (scm_charnums[c]);
  668. char_error:
  669. scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
  670. scm_list_1 (scm_from_locale_stringn (charname,
  671. charname_len)));
  672. return SCM_UNSPECIFIED;
  673. }
  674. #undef FUNC_NAME
  675. static inline SCM
  676. scm_read_keyword (int chr, SCM port)
  677. {
  678. SCM symbol;
  679. /* Read the symbol that comprises the keyword. Doing this instead of
  680. invoking a specific symbol reader function allows `scm_read_keyword ()'
  681. to adapt to the delimiters currently valid of symbols.
  682. XXX: This implementation allows sloppy syntaxes like `#: key'. */
  683. symbol = scm_read_expression (port);
  684. if (!scm_is_symbol (symbol))
  685. scm_i_input_error ("scm_read_keyword", port,
  686. "keyword prefix `~a' not followed by a symbol: ~s",
  687. scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
  688. return (scm_symbol_to_keyword (symbol));
  689. }
  690. static inline SCM
  691. scm_read_vector (int chr, SCM port)
  692. {
  693. /* Note: We call `scm_read_sexp ()' rather than READER here in order to
  694. guarantee that it's going to do what we want. After all, this is an
  695. implementation detail of `scm_read_vector ()', not a desirable
  696. property. */
  697. return (scm_vector (scm_read_sexp (chr, port)));
  698. }
  699. static inline SCM
  700. scm_read_srfi4_vector (int chr, SCM port)
  701. {
  702. return scm_i_read_array (port, chr);
  703. }
  704. static SCM
  705. scm_read_guile_bit_vector (int chr, SCM port)
  706. {
  707. /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
  708. terribly inefficient but who cares? */
  709. SCM s_bits = SCM_EOL;
  710. for (chr = scm_getc (port);
  711. (chr != EOF) && ((chr == '0') || (chr == '1'));
  712. chr = scm_getc (port))
  713. {
  714. s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
  715. }
  716. if (chr != EOF)
  717. scm_ungetc (chr, port);
  718. return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
  719. }
  720. static inline SCM
  721. scm_read_scsh_block_comment (int chr, SCM port)
  722. {
  723. int bang_seen = 0;
  724. for (;;)
  725. {
  726. int c = scm_getc (port);
  727. if (c == EOF)
  728. scm_i_input_error ("skip_block_comment", port,
  729. "unterminated `#! ... !#' comment", SCM_EOL);
  730. if (c == '!')
  731. bang_seen = 1;
  732. else if (c == '#' && bang_seen)
  733. break;
  734. else
  735. bang_seen = 0;
  736. }
  737. return SCM_UNSPECIFIED;
  738. }
  739. static SCM
  740. scm_read_extended_symbol (int chr, SCM port)
  741. {
  742. /* Guile's extended symbol read syntax looks like this:
  743. #{This is all a symbol name}#
  744. So here, CHR is expected to be `{'. */
  745. SCM result;
  746. int saw_brace = 0, finished = 0;
  747. size_t len = 0;
  748. char buf[1024];
  749. result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
  750. while ((chr = scm_getc (port)) != EOF)
  751. {
  752. if (saw_brace)
  753. {
  754. if (chr == '#')
  755. {
  756. finished = 1;
  757. break;
  758. }
  759. else
  760. {
  761. saw_brace = 0;
  762. buf[len++] = '}';
  763. buf[len++] = chr;
  764. }
  765. }
  766. else if (chr == '}')
  767. saw_brace = 1;
  768. else
  769. buf[len++] = chr;
  770. if (len >= sizeof (buf) - 2)
  771. {
  772. scm_string_append (scm_list_2 (result,
  773. scm_from_locale_stringn (buf, len)));
  774. len = 0;
  775. }
  776. if (finished)
  777. break;
  778. }
  779. if (len)
  780. result = scm_string_append (scm_list_2
  781. (result,
  782. scm_from_locale_stringn (buf, len)));
  783. return (scm_string_to_symbol (result));
  784. }
  785. /* Top-level token readers, i.e., dispatchers. */
  786. static SCM
  787. scm_read_sharp_extension (int chr, SCM port)
  788. {
  789. SCM proc;
  790. proc = scm_get_hash_procedure (chr);
  791. if (scm_is_true (scm_procedure_p (proc)))
  792. {
  793. long line = SCM_LINUM (port);
  794. int column = SCM_COL (port) - 2;
  795. SCM got;
  796. got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
  797. if (!scm_is_eq (got, SCM_UNSPECIFIED))
  798. {
  799. if (SCM_RECORD_POSITIONS_P)
  800. return (recsexpr (got, line, column,
  801. SCM_FILENAME (port)));
  802. else
  803. return got;
  804. }
  805. }
  806. return SCM_UNSPECIFIED;
  807. }
  808. /* The reader for the sharp `#' character. It basically dispatches reads
  809. among the above token readers. */
  810. static SCM
  811. scm_read_sharp (int chr, SCM port)
  812. #define FUNC_NAME "scm_lreadr"
  813. {
  814. SCM result;
  815. chr = scm_getc (port);
  816. result = scm_read_sharp_extension (chr, port);
  817. if (!scm_is_eq (result, SCM_UNSPECIFIED))
  818. return result;
  819. switch (chr)
  820. {
  821. case '\\':
  822. return (scm_read_character (chr, port));
  823. case '(':
  824. return (scm_read_vector (chr, port));
  825. case 's':
  826. case 'u':
  827. case 'f':
  828. /* This one may return either a boolean or an SRFI-4 vector. */
  829. return (scm_read_srfi4_vector (chr, port));
  830. case '*':
  831. return (scm_read_guile_bit_vector (chr, port));
  832. case 't':
  833. case 'T':
  834. case 'F':
  835. /* This one may return either a boolean or an SRFI-4 vector. */
  836. return (scm_read_boolean (chr, port));
  837. case ':':
  838. return (scm_read_keyword (chr, port));
  839. case '0': case '1': case '2': case '3': case '4':
  840. case '5': case '6': case '7': case '8': case '9':
  841. case '@':
  842. #if SCM_ENABLE_DEPRECATED
  843. /* See below for 'i' and 'e'. */
  844. case 'a':
  845. case 'c':
  846. case 'y':
  847. case 'h':
  848. case 'l':
  849. #endif
  850. return (scm_i_read_array (port, chr));
  851. case 'i':
  852. case 'e':
  853. #if SCM_ENABLE_DEPRECATED
  854. {
  855. /* When next char is '(', it really is an old-style
  856. uniform array. */
  857. int next_c = scm_getc (port);
  858. if (next_c != EOF)
  859. scm_ungetc (next_c, port);
  860. if (next_c == '(')
  861. return scm_i_read_array (port, chr);
  862. /* Fall through. */
  863. }
  864. #endif
  865. case 'b':
  866. case 'B':
  867. case 'o':
  868. case 'O':
  869. case 'd':
  870. case 'D':
  871. case 'x':
  872. case 'X':
  873. case 'I':
  874. case 'E':
  875. return (scm_read_number_and_radix (chr, port));
  876. case '{':
  877. return (scm_read_extended_symbol (chr, port));
  878. case '!':
  879. return (scm_read_scsh_block_comment (chr, port));
  880. default:
  881. result = scm_read_sharp_extension (chr, port);
  882. if (scm_is_eq (result, SCM_UNSPECIFIED))
  883. scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
  884. scm_list_1 (SCM_MAKE_CHAR (chr)));
  885. else
  886. return result;
  887. }
  888. return SCM_UNSPECIFIED;
  889. }
  890. #undef FUNC_NAME
  891. static SCM
  892. scm_read_expression (SCM port)
  893. #define FUNC_NAME "scm_read_expression"
  894. {
  895. while (1)
  896. {
  897. register int chr;
  898. chr = scm_getc (port);
  899. switch (chr)
  900. {
  901. case SCM_WHITE_SPACES:
  902. case SCM_LINE_INCREMENTORS:
  903. break;
  904. case ';':
  905. (void) scm_read_semicolon_comment (chr, port);
  906. break;
  907. case '(':
  908. return (scm_read_sexp (chr, port));
  909. case '"':
  910. return (scm_read_string (chr, port));
  911. case '\'':
  912. case '`':
  913. case ',':
  914. return (scm_read_quote (chr, port));
  915. case '#':
  916. {
  917. SCM result;
  918. result = scm_read_sharp (chr, port);
  919. if (scm_is_eq (result, SCM_UNSPECIFIED))
  920. /* We read a comment or some such. */
  921. break;
  922. else
  923. return result;
  924. }
  925. case ')':
  926. scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
  927. break;
  928. case EOF:
  929. return SCM_EOF_VAL;
  930. case ':':
  931. if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
  932. return scm_symbol_to_keyword (scm_read_expression (port));
  933. /* Fall through. */
  934. default:
  935. {
  936. if (((chr >= '0') && (chr <= '9'))
  937. || (strchr ("+-.", chr)))
  938. return (scm_read_number (chr, port));
  939. else
  940. return (scm_read_mixed_case_symbol (chr, port));
  941. }
  942. }
  943. }
  944. }
  945. #undef FUNC_NAME
  946. /* Actual reader. */
  947. SCM_DEFINE (scm_read, "read", 0, 1, 0,
  948. (SCM port),
  949. "Read an s-expression from the input port @var{port}, or from\n"
  950. "the current input port if @var{port} is not specified.\n"
  951. "Any whitespace before the next token is discarded.")
  952. #define FUNC_NAME s_scm_read
  953. {
  954. int c;
  955. if (SCM_UNBNDP (port))
  956. port = scm_current_input_port ();
  957. SCM_VALIDATE_OPINPORT (1, port);
  958. c = flush_ws (port, (char *) NULL);
  959. if (EOF == c)
  960. return SCM_EOF_VAL;
  961. scm_ungetc (c, port);
  962. return (scm_read_expression (port));
  963. }
  964. #undef FUNC_NAME
  965. /* Used when recording expressions constructed by `scm_read_sharp ()'. */
  966. static SCM
  967. recsexpr (SCM obj, long line, int column, SCM filename)
  968. {
  969. if (!scm_is_pair(obj)) {
  970. return obj;
  971. } else {
  972. SCM tmp = obj, copy;
  973. /* If this sexpr is visible in the read:sharp source, we want to
  974. keep that information, so only record non-constant cons cells
  975. which haven't previously been read by the reader. */
  976. if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
  977. {
  978. if (SCM_COPY_SOURCE_P)
  979. {
  980. copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
  981. SCM_UNDEFINED);
  982. while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
  983. {
  984. SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
  985. line,
  986. column,
  987. filename),
  988. SCM_UNDEFINED));
  989. copy = SCM_CDR (copy);
  990. }
  991. SCM_SETCDR (copy, tmp);
  992. }
  993. else
  994. {
  995. recsexpr (SCM_CAR (obj), line, column, filename);
  996. while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
  997. recsexpr (SCM_CAR (tmp), line, column, filename);
  998. copy = SCM_UNDEFINED;
  999. }
  1000. scm_whash_insert (scm_source_whash,
  1001. obj,
  1002. scm_make_srcprops (line,
  1003. column,
  1004. filename,
  1005. copy,
  1006. SCM_EOL));
  1007. }
  1008. return obj;
  1009. }
  1010. }
  1011. /* Manipulate the read-hash-procedures alist. This could be written in
  1012. Scheme, but maybe it will also be used by C code during initialisation. */
  1013. SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
  1014. (SCM chr, SCM proc),
  1015. "Install the procedure @var{proc} for reading expressions\n"
  1016. "starting with the character sequence @code{#} and @var{chr}.\n"
  1017. "@var{proc} will be called with two arguments: the character\n"
  1018. "@var{chr} and the port to read further data from. The object\n"
  1019. "returned will be the return value of @code{read}.")
  1020. #define FUNC_NAME s_scm_read_hash_extend
  1021. {
  1022. SCM this;
  1023. SCM prev;
  1024. SCM_VALIDATE_CHAR (1, chr);
  1025. SCM_ASSERT (scm_is_false (proc)
  1026. || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
  1027. proc, SCM_ARG2, FUNC_NAME);
  1028. /* Check if chr is already in the alist. */
  1029. this = *scm_read_hash_procedures;
  1030. prev = SCM_BOOL_F;
  1031. while (1)
  1032. {
  1033. if (scm_is_null (this))
  1034. {
  1035. /* not found, so add it to the beginning. */
  1036. if (scm_is_true (proc))
  1037. {
  1038. *scm_read_hash_procedures =
  1039. scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
  1040. }
  1041. break;
  1042. }
  1043. if (scm_is_eq (chr, SCM_CAAR (this)))
  1044. {
  1045. /* already in the alist. */
  1046. if (scm_is_false (proc))
  1047. {
  1048. /* remove it. */
  1049. if (scm_is_false (prev))
  1050. {
  1051. *scm_read_hash_procedures =
  1052. SCM_CDR (*scm_read_hash_procedures);
  1053. }
  1054. else
  1055. scm_set_cdr_x (prev, SCM_CDR (this));
  1056. }
  1057. else
  1058. {
  1059. /* replace it. */
  1060. scm_set_cdr_x (SCM_CAR (this), proc);
  1061. }
  1062. break;
  1063. }
  1064. prev = this;
  1065. this = SCM_CDR (this);
  1066. }
  1067. return SCM_UNSPECIFIED;
  1068. }
  1069. #undef FUNC_NAME
  1070. /* Recover the read-hash procedure corresponding to char c. */
  1071. static SCM
  1072. scm_get_hash_procedure (int c)
  1073. {
  1074. SCM rest = *scm_read_hash_procedures;
  1075. while (1)
  1076. {
  1077. if (scm_is_null (rest))
  1078. return SCM_BOOL_F;
  1079. if (SCM_CHAR (SCM_CAAR (rest)) == c)
  1080. return SCM_CDAR (rest);
  1081. rest = SCM_CDR (rest);
  1082. }
  1083. }
  1084. void
  1085. scm_init_read ()
  1086. {
  1087. scm_read_hash_procedures =
  1088. SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
  1089. scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
  1090. #include "libguile/read.x"
  1091. }
  1092. /*
  1093. Local Variables:
  1094. c-file-style: "gnu"
  1095. End:
  1096. */