chars.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Lesser General Public License
  5. * as published by the Free Software Foundation; either version 3 of
  6. * the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Lesser General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Lesser General Public
  14. * License along with this library; if not, write to the Free Software
  15. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16. * 02110-1301 USA
  17. */
  18. #ifdef HAVE_CONFIG_H
  19. # include <config.h>
  20. #endif
  21. #include <ctype.h>
  22. #include <limits.h>
  23. #include <unicase.h>
  24. #include <unictype.h>
  25. #include "libguile/_scm.h"
  26. #include "libguile/validate.h"
  27. #include "libguile/chars.h"
  28. #include "libguile/srfi-14.h"
  29. SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
  30. (SCM x),
  31. "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
  32. #define FUNC_NAME s_scm_char_p
  33. {
  34. return scm_from_bool (SCM_CHARP(x));
  35. }
  36. #undef FUNC_NAME
  37. static SCM scm_i_char_eq_p (SCM x, SCM y, SCM rest);
  38. SCM_DEFINE (scm_i_char_eq_p, "char=?", 0, 2, 1,
  39. (SCM x, SCM y, SCM rest),
  40. "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
  41. "code point of @var{y}, else @code{#f}.\n")
  42. #define FUNC_NAME s_scm_i_char_eq_p
  43. {
  44. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  45. return SCM_BOOL_T;
  46. while (!scm_is_null (rest))
  47. {
  48. if (scm_is_false (scm_char_eq_p (x, y)))
  49. return SCM_BOOL_F;
  50. x = y;
  51. y = scm_car (rest);
  52. rest = scm_cdr (rest);
  53. }
  54. return scm_char_eq_p (x, y);
  55. }
  56. #undef FUNC_NAME
  57. SCM scm_char_eq_p (SCM x, SCM y)
  58. #define FUNC_NAME s_scm_i_char_eq_p
  59. {
  60. SCM_VALIDATE_CHAR (1, x);
  61. SCM_VALIDATE_CHAR (2, y);
  62. return scm_from_bool (scm_is_eq (x, y));
  63. }
  64. #undef FUNC_NAME
  65. static SCM scm_i_char_less_p (SCM x, SCM y, SCM rest);
  66. SCM_DEFINE (scm_i_char_less_p, "char<?", 0, 2, 1,
  67. (SCM x, SCM y, SCM rest),
  68. "Return @code{#t} iff the code point of @var{x} is less than the code\n"
  69. "point of @var{y}, else @code{#f}.")
  70. #define FUNC_NAME s_scm_i_char_less_p
  71. {
  72. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  73. return SCM_BOOL_T;
  74. while (!scm_is_null (rest))
  75. {
  76. if (scm_is_false (scm_char_less_p (x, y)))
  77. return SCM_BOOL_F;
  78. x = y;
  79. y = scm_car (rest);
  80. rest = scm_cdr (rest);
  81. }
  82. return scm_char_less_p (x, y);
  83. }
  84. #undef FUNC_NAME
  85. SCM scm_char_less_p (SCM x, SCM y)
  86. #define FUNC_NAME s_scm_i_char_less_p
  87. {
  88. SCM_VALIDATE_CHAR (1, x);
  89. SCM_VALIDATE_CHAR (2, y);
  90. return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
  91. }
  92. #undef FUNC_NAME
  93. static SCM scm_i_char_leq_p (SCM x, SCM y, SCM rest);
  94. SCM_DEFINE (scm_i_char_leq_p, "char<=?", 0, 2, 1,
  95. (SCM x, SCM y, SCM rest),
  96. "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
  97. "equal to the code point of @var{y}, else @code{#f}.")
  98. #define FUNC_NAME s_scm_i_char_leq_p
  99. {
  100. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  101. return SCM_BOOL_T;
  102. while (!scm_is_null (rest))
  103. {
  104. if (scm_is_false (scm_char_leq_p (x, y)))
  105. return SCM_BOOL_F;
  106. x = y;
  107. y = scm_car (rest);
  108. rest = scm_cdr (rest);
  109. }
  110. return scm_char_leq_p (x, y);
  111. }
  112. #undef FUNC_NAME
  113. SCM scm_char_leq_p (SCM x, SCM y)
  114. #define FUNC_NAME s_scm_i_char_leq_p
  115. {
  116. SCM_VALIDATE_CHAR (1, x);
  117. SCM_VALIDATE_CHAR (2, y);
  118. return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
  119. }
  120. #undef FUNC_NAME
  121. static SCM scm_i_char_gr_p (SCM x, SCM y, SCM rest);
  122. SCM_DEFINE (scm_i_char_gr_p, "char>?", 0, 2, 1,
  123. (SCM x, SCM y, SCM rest),
  124. "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
  125. "the code point of @var{y}, else @code{#f}.")
  126. #define FUNC_NAME s_scm_i_char_gr_p
  127. {
  128. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  129. return SCM_BOOL_T;
  130. while (!scm_is_null (rest))
  131. {
  132. if (scm_is_false (scm_char_gr_p (x, y)))
  133. return SCM_BOOL_F;
  134. x = y;
  135. y = scm_car (rest);
  136. rest = scm_cdr (rest);
  137. }
  138. return scm_char_gr_p (x, y);
  139. }
  140. #undef FUNC_NAME
  141. SCM scm_char_gr_p (SCM x, SCM y)
  142. #define FUNC_NAME s_scm_i_char_gr_p
  143. {
  144. SCM_VALIDATE_CHAR (1, x);
  145. SCM_VALIDATE_CHAR (2, y);
  146. return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
  147. }
  148. #undef FUNC_NAME
  149. static SCM scm_i_char_geq_p (SCM x, SCM y, SCM rest);
  150. SCM_DEFINE (scm_i_char_geq_p, "char>=?", 0, 2, 1,
  151. (SCM x, SCM y, SCM rest),
  152. "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
  153. "or equal to the code point of @var{y}, else @code{#f}.")
  154. #define FUNC_NAME s_scm_i_char_geq_p
  155. {
  156. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  157. return SCM_BOOL_T;
  158. while (!scm_is_null (rest))
  159. {
  160. if (scm_is_false (scm_char_geq_p (x, y)))
  161. return SCM_BOOL_F;
  162. x = y;
  163. y = scm_car (rest);
  164. rest = scm_cdr (rest);
  165. }
  166. return scm_char_geq_p (x, y);
  167. }
  168. #undef FUNC_NAME
  169. SCM scm_char_geq_p (SCM x, SCM y)
  170. #define FUNC_NAME s_scm_i_char_geq_p
  171. {
  172. SCM_VALIDATE_CHAR (1, x);
  173. SCM_VALIDATE_CHAR (2, y);
  174. return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
  175. }
  176. #undef FUNC_NAME
  177. /* FIXME?: R6RS specifies that these comparisons are case-folded.
  178. This is the same thing as comparing the uppercase characters in
  179. practice, but, not in theory. Unicode has table containing their
  180. definition of case-folded character mappings. A more correct
  181. implementation would be to use that table and make a char-foldcase
  182. function. */
  183. static SCM scm_i_char_ci_eq_p (SCM x, SCM y, SCM rest);
  184. SCM_DEFINE (scm_i_char_ci_eq_p, "char-ci=?", 0, 2, 1,
  185. (SCM x, SCM y, SCM rest),
  186. "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
  187. "the same as the case-folded code point of @var{y}, else @code{#f}.")
  188. #define FUNC_NAME s_scm_i_char_ci_eq_p
  189. {
  190. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  191. return SCM_BOOL_T;
  192. while (!scm_is_null (rest))
  193. {
  194. if (scm_is_false (scm_char_ci_eq_p (x, y)))
  195. return SCM_BOOL_F;
  196. x = y;
  197. y = scm_car (rest);
  198. rest = scm_cdr (rest);
  199. }
  200. return scm_char_ci_eq_p (x, y);
  201. }
  202. #undef FUNC_NAME
  203. SCM scm_char_ci_eq_p (SCM x, SCM y)
  204. #define FUNC_NAME s_scm_i_char_ci_eq_p
  205. {
  206. SCM_VALIDATE_CHAR (1, x);
  207. SCM_VALIDATE_CHAR (2, y);
  208. return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
  209. }
  210. #undef FUNC_NAME
  211. static SCM scm_i_char_ci_less_p (SCM x, SCM y, SCM rest);
  212. SCM_DEFINE (scm_i_char_ci_less_p, "char-ci<?", 0, 2, 1,
  213. (SCM x, SCM y, SCM rest),
  214. "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
  215. "less than the case-folded code point of @var{y}, else @code{#f}.")
  216. #define FUNC_NAME s_scm_i_char_ci_less_p
  217. {
  218. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  219. return SCM_BOOL_T;
  220. while (!scm_is_null (rest))
  221. {
  222. if (scm_is_false (scm_char_ci_less_p (x, y)))
  223. return SCM_BOOL_F;
  224. x = y;
  225. y = scm_car (rest);
  226. rest = scm_cdr (rest);
  227. }
  228. return scm_char_ci_less_p (x, y);
  229. }
  230. #undef FUNC_NAME
  231. SCM scm_char_ci_less_p (SCM x, SCM y)
  232. #define FUNC_NAME s_scm_i_char_ci_less_p
  233. {
  234. SCM_VALIDATE_CHAR (1, x);
  235. SCM_VALIDATE_CHAR (2, y);
  236. return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
  237. }
  238. #undef FUNC_NAME
  239. static SCM scm_i_char_ci_leq_p (SCM x, SCM y, SCM rest);
  240. SCM_DEFINE (scm_i_char_ci_leq_p, "char-ci<=?", 0, 2, 1,
  241. (SCM x, SCM y, SCM rest),
  242. "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
  243. "less than or equal to the case-folded code point of @var{y}, else\n"
  244. "@code{#f}")
  245. #define FUNC_NAME s_scm_i_char_ci_leq_p
  246. {
  247. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  248. return SCM_BOOL_T;
  249. while (!scm_is_null (rest))
  250. {
  251. if (scm_is_false (scm_char_ci_leq_p (x, y)))
  252. return SCM_BOOL_F;
  253. x = y;
  254. y = scm_car (rest);
  255. rest = scm_cdr (rest);
  256. }
  257. return scm_char_ci_leq_p (x, y);
  258. }
  259. #undef FUNC_NAME
  260. SCM scm_char_ci_leq_p (SCM x, SCM y)
  261. #define FUNC_NAME s_scm_i_char_ci_leq_p
  262. {
  263. SCM_VALIDATE_CHAR (1, x);
  264. SCM_VALIDATE_CHAR (2, y);
  265. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
  266. }
  267. #undef FUNC_NAME
  268. static SCM scm_i_char_ci_gr_p (SCM x, SCM y, SCM rest);
  269. SCM_DEFINE (scm_i_char_ci_gr_p, "char-ci>?", 0, 2, 1,
  270. (SCM x, SCM y, SCM rest),
  271. "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
  272. "than the case-folded code point of @var{y}, else @code{#f}.")
  273. #define FUNC_NAME s_scm_i_char_ci_gr_p
  274. {
  275. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  276. return SCM_BOOL_T;
  277. while (!scm_is_null (rest))
  278. {
  279. if (scm_is_false (scm_char_ci_gr_p (x, y)))
  280. return SCM_BOOL_F;
  281. x = y;
  282. y = scm_car (rest);
  283. rest = scm_cdr (rest);
  284. }
  285. return scm_char_ci_gr_p (x, y);
  286. }
  287. #undef FUNC_NAME
  288. SCM scm_char_ci_gr_p (SCM x, SCM y)
  289. #define FUNC_NAME s_scm_i_char_ci_gr_p
  290. {
  291. SCM_VALIDATE_CHAR (1, x);
  292. SCM_VALIDATE_CHAR (2, y);
  293. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
  294. }
  295. #undef FUNC_NAME
  296. static SCM scm_i_char_ci_geq_p (SCM x, SCM y, SCM rest);
  297. SCM_DEFINE (scm_i_char_ci_geq_p, "char-ci>=?", 0, 2, 1,
  298. (SCM x, SCM y, SCM rest),
  299. "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
  300. "greater than or equal to the case-folded code point of @var{y}, else\n"
  301. "@code{#f}.")
  302. #define FUNC_NAME s_scm_i_char_ci_geq_p
  303. {
  304. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  305. return SCM_BOOL_T;
  306. while (!scm_is_null (rest))
  307. {
  308. if (scm_is_false (scm_char_ci_geq_p (x, y)))
  309. return SCM_BOOL_F;
  310. x = y;
  311. y = scm_car (rest);
  312. rest = scm_cdr (rest);
  313. }
  314. return scm_char_ci_geq_p (x, y);
  315. }
  316. #undef FUNC_NAME
  317. SCM scm_char_ci_geq_p (SCM x, SCM y)
  318. #define FUNC_NAME s_scm_i_char_ci_geq_p
  319. {
  320. SCM_VALIDATE_CHAR (1, x);
  321. SCM_VALIDATE_CHAR (2, y);
  322. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
  323. }
  324. #undef FUNC_NAME
  325. SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
  326. (SCM chr),
  327. "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
  328. #define FUNC_NAME s_scm_char_alphabetic_p
  329. {
  330. return scm_char_set_contains_p (scm_char_set_letter, chr);
  331. }
  332. #undef FUNC_NAME
  333. SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
  334. (SCM chr),
  335. "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
  336. #define FUNC_NAME s_scm_char_numeric_p
  337. {
  338. return scm_char_set_contains_p (scm_char_set_digit, chr);
  339. }
  340. #undef FUNC_NAME
  341. SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
  342. (SCM chr),
  343. "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
  344. #define FUNC_NAME s_scm_char_whitespace_p
  345. {
  346. return scm_char_set_contains_p (scm_char_set_whitespace, chr);
  347. }
  348. #undef FUNC_NAME
  349. SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
  350. (SCM chr),
  351. "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
  352. #define FUNC_NAME s_scm_char_upper_case_p
  353. {
  354. return scm_char_set_contains_p (scm_char_set_upper_case, chr);
  355. }
  356. #undef FUNC_NAME
  357. SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
  358. (SCM chr),
  359. "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
  360. #define FUNC_NAME s_scm_char_lower_case_p
  361. {
  362. return scm_char_set_contains_p (scm_char_set_lower_case, chr);
  363. }
  364. #undef FUNC_NAME
  365. SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
  366. (SCM chr),
  367. "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
  368. "@code{#f}.\n")
  369. #define FUNC_NAME s_scm_char_is_both_p
  370. {
  371. if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
  372. return SCM_BOOL_T;
  373. return scm_char_set_contains_p (scm_char_set_upper_case, chr);
  374. }
  375. #undef FUNC_NAME
  376. SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
  377. (SCM chr),
  378. "Return the Unicode code point of @var{chr}.")
  379. #define FUNC_NAME s_scm_char_to_integer
  380. {
  381. SCM_VALIDATE_CHAR (1, chr);
  382. return scm_from_uint32 (SCM_CHAR(chr));
  383. }
  384. #undef FUNC_NAME
  385. SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
  386. (SCM n),
  387. "Return the character that has Unicode code point @var{n}. The integer\n"
  388. "@var{n} must be a valid code point. Valid code points are in the\n"
  389. "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
  390. "@code{#x10FFFF} inclusive.")
  391. #define FUNC_NAME s_scm_integer_to_char
  392. {
  393. scm_t_wchar cn;
  394. cn = scm_to_wchar (n);
  395. /* Avoid the surrogates. */
  396. if (!SCM_IS_UNICODE_CHAR (cn))
  397. scm_out_of_range (FUNC_NAME, n);
  398. return SCM_MAKE_CHAR (cn);
  399. }
  400. #undef FUNC_NAME
  401. SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
  402. (SCM chr),
  403. "Return the uppercase character version of @var{chr}.")
  404. #define FUNC_NAME s_scm_char_upcase
  405. {
  406. SCM_VALIDATE_CHAR (1, chr);
  407. return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
  408. }
  409. #undef FUNC_NAME
  410. SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
  411. (SCM chr),
  412. "Return the lowercase character version of @var{chr}.")
  413. #define FUNC_NAME s_scm_char_downcase
  414. {
  415. SCM_VALIDATE_CHAR (1, chr);
  416. return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
  417. }
  418. #undef FUNC_NAME
  419. SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
  420. (SCM chr),
  421. "Return the titlecase character version of @var{chr}.")
  422. #define FUNC_NAME s_scm_char_titlecase
  423. {
  424. SCM_VALIDATE_CHAR (1, chr);
  425. return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr)));
  426. }
  427. #undef FUNC_NAME
  428. SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0,
  429. (SCM chr),
  430. "Return a symbol representing the Unicode general category of "
  431. "@var{chr} or @code{#f} if a named category cannot be found.")
  432. #define FUNC_NAME s_scm_char_general_category
  433. {
  434. const char *sym;
  435. uc_general_category_t cat;
  436. SCM_VALIDATE_CHAR (1, chr);
  437. cat = uc_general_category (SCM_CHAR (chr));
  438. sym = uc_general_category_name (cat);
  439. if (sym != NULL)
  440. return scm_from_locale_symbol (sym);
  441. return SCM_BOOL_F;
  442. }
  443. #undef FUNC_NAME
  444. /*
  445. TODO: change name to scm_i_.. ? --hwn
  446. */
  447. scm_t_wchar
  448. scm_c_upcase (scm_t_wchar c)
  449. {
  450. return uc_toupper ((int) c);
  451. }
  452. scm_t_wchar
  453. scm_c_downcase (scm_t_wchar c)
  454. {
  455. return uc_tolower ((int) c);
  456. }
  457. scm_t_wchar
  458. scm_c_titlecase (scm_t_wchar c)
  459. {
  460. return uc_totitle ((int) c);
  461. }
  462. /* There are a few sets of character names: R5RS, Guile
  463. extensions for control characters, and leftover Guile extensions.
  464. They are listed in order of precedence. */
  465. static const char *const scm_r5rs_charnames[] = {
  466. "space", "newline"
  467. };
  468. static const scm_t_uint32 const scm_r5rs_charnums[] = {
  469. 0x20, 0x0a
  470. };
  471. #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
  472. static const char *const scm_r6rs_charnames[] = {
  473. "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
  474. "return", "esc", "delete"
  475. /* 'space' and 'newline' are already included from the R5RS list. */
  476. };
  477. static const scm_t_uint32 const scm_r6rs_charnums[] = {
  478. 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
  479. 0x0d, 0x1b, 0x7f
  480. };
  481. #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
  482. /* The abbreviated names for control characters. */
  483. static const char *const scm_C0_control_charnames[] = {
  484. /* C0 controls */
  485. "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
  486. "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
  487. "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
  488. "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
  489. "sp", "del"
  490. };
  491. static const scm_t_uint32 const scm_C0_control_charnums[] = {
  492. 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  493. 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  494. 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  495. 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  496. 0x20, 0x7f
  497. };
  498. #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
  499. static const char *const scm_alt_charnames[] = {
  500. "null", "nl", "np"
  501. };
  502. static const scm_t_uint32 const scm_alt_charnums[] = {
  503. 0x00, 0x0a, 0x0c
  504. };
  505. #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
  506. /* Returns the string charname for a character if it exists, or NULL
  507. otherwise. */
  508. const char *
  509. scm_i_charname (SCM chr)
  510. {
  511. size_t c;
  512. scm_t_uint32 i = SCM_CHAR (chr);
  513. for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
  514. if (scm_r5rs_charnums[c] == i)
  515. return scm_r5rs_charnames[c];
  516. for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
  517. if (scm_r6rs_charnums[c] == i)
  518. return scm_r6rs_charnames[c];
  519. for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
  520. if (scm_C0_control_charnums[c] == i)
  521. return scm_C0_control_charnames[c];
  522. /* Since the characters in scm_alt_charnums is a subset of
  523. scm_C0_control_charnums, this code is never reached. */
  524. for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
  525. if (scm_alt_charnums[c] == i)
  526. return scm_alt_charnames[c];
  527. return NULL;
  528. }
  529. /* Return a character from a string charname. */
  530. SCM
  531. scm_i_charname_to_char (const char *charname, size_t charname_len)
  532. {
  533. size_t c;
  534. /* The R5RS charnames. These are supposed to be case insensitive. */
  535. for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
  536. if ((strlen (scm_r5rs_charnames[c]) == charname_len)
  537. && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
  538. return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
  539. /* The R6RS charnames. R6RS says that these should be case-sensitive. They
  540. are left as case-insensitive to avoid confusion. */
  541. for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
  542. if ((strlen (scm_r6rs_charnames[c]) == charname_len)
  543. && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
  544. return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
  545. /* Then come the controls. By Guile convention, these are not case
  546. sensitive. */
  547. for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
  548. if ((strlen (scm_C0_control_charnames[c]) == charname_len)
  549. && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
  550. return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
  551. /* Lastly are some old names carried over for compatibility. */
  552. for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
  553. if ((strlen (scm_alt_charnames[c]) == charname_len)
  554. && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
  555. return SCM_MAKE_CHAR (scm_alt_charnums[c]);
  556. return SCM_BOOL_F;
  557. }
  558. void
  559. scm_init_chars ()
  560. {
  561. #include "libguile/chars.x"
  562. }
  563. /*
  564. Local Variables:
  565. c-file-style: "gnu"
  566. End:
  567. */