strings.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008 Free Software Foundation, Inc.
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Lesser General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2.1 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Lesser General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Lesser General Public
  14. * License along with this library; if not, write to the Free Software
  15. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  16. */
  17. #ifdef HAVE_CONFIG_H
  18. # include <config.h>
  19. #endif
  20. #include <string.h>
  21. #include <stdio.h>
  22. #include "libguile/_scm.h"
  23. #include "libguile/chars.h"
  24. #include "libguile/root.h"
  25. #include "libguile/strings.h"
  26. #include "libguile/deprecation.h"
  27. #include "libguile/validate.h"
  28. #include "libguile/dynwind.h"
  29. /* {Strings}
  30. */
  31. /* Stringbufs
  32. *
  33. * XXX - keeping an accurate refcount during GC seems to be quite
  34. * tricky, so we just keep score of whether a stringbuf might be
  35. * shared, not wether it definitely is.
  36. *
  37. * The scheme I (mvo) tried to keep an accurate reference count would
  38. * recount all strings that point to a stringbuf during the mark-phase
  39. * of the GC. This was done since one cannot access the stringbuf of
  40. * a string when that string is freed (in order to decrease the
  41. * reference count). The memory of the stringbuf might have been
  42. * reused already for something completely different.
  43. *
  44. * This recounted worked for a small number of threads beating on
  45. * cow-strings, but it failed randomly with more than 10 threads, say.
  46. * I couldn't figure out what went wrong, so I used the conservative
  47. * approach implemented below.
  48. *
  49. * A stringbuf needs to know its length, but only so that it can be
  50. * reported when the stringbuf is freed.
  51. *
  52. * Stringbufs (and strings) are not stored very compactly: a stringbuf
  53. * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
  54. * information. As a compensation, the code below is made more
  55. * complicated by storing small strings inline in the double cell of a
  56. * stringbuf. So we have fixstrings and bigstrings...
  57. */
  58. #define STRINGBUF_F_SHARED 0x100
  59. #define STRINGBUF_F_INLINE 0x200
  60. #define STRINGBUF_TAG scm_tc7_stringbuf
  61. #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
  62. #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
  63. #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
  64. #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
  65. #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
  66. #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
  67. #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
  68. ? STRINGBUF_INLINE_CHARS (buf) \
  69. : STRINGBUF_OUTLINE_CHARS (buf))
  70. #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
  71. ? STRINGBUF_INLINE_LENGTH (buf) \
  72. : STRINGBUF_OUTLINE_LENGTH (buf))
  73. #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
  74. #define SET_STRINGBUF_SHARED(buf) \
  75. (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
  76. #if SCM_DEBUG
  77. static size_t lenhist[1001];
  78. #endif
  79. static SCM
  80. make_stringbuf (size_t len)
  81. {
  82. /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
  83. scm_i_symbol_chars, all stringbufs are null-terminated. Once
  84. SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
  85. has been changed for scm_i_symbol_chars, this null-termination
  86. can be dropped.
  87. */
  88. #if SCM_DEBUG
  89. if (len < 1000)
  90. lenhist[len]++;
  91. else
  92. lenhist[1000]++;
  93. #endif
  94. if (len <= STRINGBUF_MAX_INLINE_LEN-1)
  95. {
  96. return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
  97. 0, 0, 0);
  98. }
  99. else
  100. {
  101. char *mem = scm_gc_malloc (len+1, "string");
  102. mem[len] = '\0';
  103. return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
  104. (scm_t_bits) len, (scm_t_bits) 0);
  105. }
  106. }
  107. /* Return a new stringbuf whose underlying storage consists of the LEN+1
  108. octets pointed to by STR (the last octet is zero). */
  109. SCM
  110. scm_i_take_stringbufn (char *str, size_t len)
  111. {
  112. scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
  113. return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
  114. (scm_t_bits) len, (scm_t_bits) 0);
  115. }
  116. SCM
  117. scm_i_stringbuf_mark (SCM buf)
  118. {
  119. return SCM_BOOL_F;
  120. }
  121. void
  122. scm_i_stringbuf_free (SCM buf)
  123. {
  124. if (!STRINGBUF_INLINE (buf))
  125. scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
  126. STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
  127. }
  128. scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
  129. /* Copy-on-write strings.
  130. */
  131. #define STRING_TAG scm_tc7_string
  132. #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
  133. #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
  134. #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
  135. #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
  136. #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
  137. #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
  138. /* Read-only strings.
  139. */
  140. #define RO_STRING_TAG (scm_tc7_string + 0x200)
  141. #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
  142. /* Mutation-sharing substrings
  143. */
  144. #define SH_STRING_TAG (scm_tc7_string + 0x100)
  145. #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
  146. /* START and LENGTH as for STRINGs. */
  147. #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
  148. SCM
  149. scm_i_make_string (size_t len, char **charsp)
  150. {
  151. SCM buf = make_stringbuf (len);
  152. SCM res;
  153. if (charsp)
  154. *charsp = STRINGBUF_CHARS (buf);
  155. res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
  156. (scm_t_bits)0, (scm_t_bits) len);
  157. return res;
  158. }
  159. static void
  160. validate_substring_args (SCM str, size_t start, size_t end)
  161. {
  162. if (!IS_STRING (str))
  163. scm_wrong_type_arg_msg (NULL, 0, str, "string");
  164. if (start > STRING_LENGTH (str))
  165. scm_out_of_range (NULL, scm_from_size_t (start));
  166. if (end > STRING_LENGTH (str) || end < start)
  167. scm_out_of_range (NULL, scm_from_size_t (end));
  168. }
  169. static inline void
  170. get_str_buf_start (SCM *str, SCM *buf, size_t *start)
  171. {
  172. *start = STRING_START (*str);
  173. if (IS_SH_STRING (*str))
  174. {
  175. *str = SH_STRING_STRING (*str);
  176. *start += STRING_START (*str);
  177. }
  178. *buf = STRING_STRINGBUF (*str);
  179. }
  180. SCM
  181. scm_i_substring (SCM str, size_t start, size_t end)
  182. {
  183. SCM buf;
  184. size_t str_start;
  185. get_str_buf_start (&str, &buf, &str_start);
  186. scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  187. SET_STRINGBUF_SHARED (buf);
  188. scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  189. return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
  190. (scm_t_bits)str_start + start,
  191. (scm_t_bits) end - start);
  192. }
  193. SCM
  194. scm_i_substring_read_only (SCM str, size_t start, size_t end)
  195. {
  196. SCM buf;
  197. size_t str_start;
  198. get_str_buf_start (&str, &buf, &str_start);
  199. scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  200. SET_STRINGBUF_SHARED (buf);
  201. scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  202. return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
  203. (scm_t_bits)str_start + start,
  204. (scm_t_bits) end - start);
  205. }
  206. SCM
  207. scm_i_substring_copy (SCM str, size_t start, size_t end)
  208. {
  209. size_t len = end - start;
  210. SCM buf, my_buf;
  211. size_t str_start;
  212. get_str_buf_start (&str, &buf, &str_start);
  213. my_buf = make_stringbuf (len);
  214. memcpy (STRINGBUF_CHARS (my_buf),
  215. STRINGBUF_CHARS (buf) + str_start + start, len);
  216. scm_remember_upto_here_1 (buf);
  217. return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
  218. (scm_t_bits)0, (scm_t_bits) len);
  219. }
  220. SCM
  221. scm_i_substring_shared (SCM str, size_t start, size_t end)
  222. {
  223. if (start == 0 && end == STRING_LENGTH (str))
  224. return str;
  225. else
  226. {
  227. size_t len = end - start;
  228. if (IS_SH_STRING (str))
  229. {
  230. start += STRING_START (str);
  231. str = SH_STRING_STRING (str);
  232. }
  233. return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
  234. (scm_t_bits)start, (scm_t_bits) len);
  235. }
  236. }
  237. SCM
  238. scm_c_substring (SCM str, size_t start, size_t end)
  239. {
  240. validate_substring_args (str, start, end);
  241. return scm_i_substring (str, start, end);
  242. }
  243. SCM
  244. scm_c_substring_read_only (SCM str, size_t start, size_t end)
  245. {
  246. validate_substring_args (str, start, end);
  247. return scm_i_substring_read_only (str, start, end);
  248. }
  249. SCM
  250. scm_c_substring_copy (SCM str, size_t start, size_t end)
  251. {
  252. validate_substring_args (str, start, end);
  253. return scm_i_substring_copy (str, start, end);
  254. }
  255. SCM
  256. scm_c_substring_shared (SCM str, size_t start, size_t end)
  257. {
  258. validate_substring_args (str, start, end);
  259. return scm_i_substring_shared (str, start, end);
  260. }
  261. SCM
  262. scm_i_string_mark (SCM str)
  263. {
  264. if (IS_SH_STRING (str))
  265. return SH_STRING_STRING (str);
  266. else
  267. return STRING_STRINGBUF (str);
  268. }
  269. void
  270. scm_i_string_free (SCM str)
  271. {
  272. }
  273. /* Internal accessors
  274. */
  275. size_t
  276. scm_i_string_length (SCM str)
  277. {
  278. return STRING_LENGTH (str);
  279. }
  280. const char *
  281. scm_i_string_chars (SCM str)
  282. {
  283. SCM buf;
  284. size_t start;
  285. get_str_buf_start (&str, &buf, &start);
  286. return STRINGBUF_CHARS (buf) + start;
  287. }
  288. char *
  289. scm_i_string_writable_chars (SCM orig_str)
  290. {
  291. SCM buf, str = orig_str;
  292. size_t start;
  293. get_str_buf_start (&str, &buf, &start);
  294. if (IS_RO_STRING (str))
  295. scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
  296. scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  297. if (STRINGBUF_SHARED (buf))
  298. {
  299. /* Clone stringbuf. For this, we put all threads to sleep.
  300. */
  301. size_t len = STRING_LENGTH (str);
  302. SCM new_buf;
  303. scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  304. new_buf = make_stringbuf (len);
  305. memcpy (STRINGBUF_CHARS (new_buf),
  306. STRINGBUF_CHARS (buf) + STRING_START (str), len);
  307. scm_i_thread_put_to_sleep ();
  308. SET_STRING_STRINGBUF (str, new_buf);
  309. start -= STRING_START (str);
  310. SET_STRING_START (str, 0);
  311. scm_i_thread_wake_up ();
  312. buf = new_buf;
  313. scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  314. }
  315. return STRINGBUF_CHARS (buf) + start;
  316. }
  317. void
  318. scm_i_string_stop_writing (void)
  319. {
  320. scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  321. }
  322. /* Symbols.
  323. Basic symbol creation and accessing is done here, the rest is in
  324. symbols.[hc]. This has been done to keep stringbufs and the
  325. internals of strings and string-like objects confined to this file.
  326. */
  327. #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
  328. SCM
  329. scm_i_make_symbol (SCM name, scm_t_bits flags,
  330. unsigned long hash, SCM props)
  331. {
  332. SCM buf;
  333. size_t start = STRING_START (name);
  334. size_t length = STRING_LENGTH (name);
  335. if (IS_SH_STRING (name))
  336. {
  337. name = SH_STRING_STRING (name);
  338. start += STRING_START (name);
  339. }
  340. buf = SYMBOL_STRINGBUF (name);
  341. if (start == 0 && length == STRINGBUF_LENGTH (buf))
  342. {
  343. /* reuse buf. */
  344. scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  345. SET_STRINGBUF_SHARED (buf);
  346. scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  347. }
  348. else
  349. {
  350. /* make new buf. */
  351. SCM new_buf = make_stringbuf (length);
  352. memcpy (STRINGBUF_CHARS (new_buf),
  353. STRINGBUF_CHARS (buf) + start, length);
  354. buf = new_buf;
  355. }
  356. return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
  357. (scm_t_bits) hash, SCM_UNPACK (props));
  358. }
  359. SCM
  360. scm_i_c_make_symbol (const char *name, size_t len,
  361. scm_t_bits flags, unsigned long hash, SCM props)
  362. {
  363. SCM buf = make_stringbuf (len);
  364. memcpy (STRINGBUF_CHARS (buf), name, len);
  365. return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
  366. (scm_t_bits) hash, SCM_UNPACK (props));
  367. }
  368. /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
  369. underlying storage. */
  370. SCM
  371. scm_i_c_take_symbol (char *name, size_t len,
  372. scm_t_bits flags, unsigned long hash, SCM props)
  373. {
  374. SCM buf = scm_i_take_stringbufn (name, len);
  375. return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
  376. (scm_t_bits) hash, SCM_UNPACK (props));
  377. }
  378. size_t
  379. scm_i_symbol_length (SCM sym)
  380. {
  381. return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
  382. }
  383. size_t
  384. scm_c_symbol_length (SCM sym)
  385. #define FUNC_NAME "scm_c_symbol_length"
  386. {
  387. SCM_VALIDATE_SYMBOL (1, sym);
  388. return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
  389. }
  390. #undef FUNC_NAME
  391. const char *
  392. scm_i_symbol_chars (SCM sym)
  393. {
  394. SCM buf = SYMBOL_STRINGBUF (sym);
  395. return STRINGBUF_CHARS (buf);
  396. }
  397. SCM
  398. scm_i_symbol_mark (SCM sym)
  399. {
  400. scm_gc_mark (SYMBOL_STRINGBUF (sym));
  401. return SCM_CELL_OBJECT_3 (sym);
  402. }
  403. void
  404. scm_i_symbol_free (SCM sym)
  405. {
  406. }
  407. SCM
  408. scm_i_symbol_substring (SCM sym, size_t start, size_t end)
  409. {
  410. SCM buf = SYMBOL_STRINGBUF (sym);
  411. scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  412. SET_STRINGBUF_SHARED (buf);
  413. scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  414. return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
  415. (scm_t_bits)start, (scm_t_bits) end - start);
  416. }
  417. /* Debugging
  418. */
  419. #if SCM_DEBUG
  420. SCM scm_sys_string_dump (SCM);
  421. SCM scm_sys_symbol_dump (SCM);
  422. SCM scm_sys_stringbuf_hist (void);
  423. SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
  424. (SCM str),
  425. "")
  426. #define FUNC_NAME s_scm_sys_string_dump
  427. {
  428. SCM_VALIDATE_STRING (1, str);
  429. fprintf (stderr, "%p:\n", str);
  430. fprintf (stderr, " start: %u\n", STRING_START (str));
  431. fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
  432. if (IS_SH_STRING (str))
  433. {
  434. fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
  435. fprintf (stderr, "\n");
  436. scm_sys_string_dump (SH_STRING_STRING (str));
  437. }
  438. else
  439. {
  440. SCM buf = STRING_STRINGBUF (str);
  441. fprintf (stderr, " buf: %p\n", buf);
  442. fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
  443. fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
  444. fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
  445. }
  446. return SCM_UNSPECIFIED;
  447. }
  448. #undef FUNC_NAME
  449. SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
  450. (SCM sym),
  451. "")
  452. #define FUNC_NAME s_scm_sys_symbol_dump
  453. {
  454. SCM_VALIDATE_SYMBOL (1, sym);
  455. fprintf (stderr, "%p:\n", sym);
  456. fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
  457. {
  458. SCM buf = SYMBOL_STRINGBUF (sym);
  459. fprintf (stderr, " buf: %p\n", buf);
  460. fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
  461. fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
  462. fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
  463. }
  464. return SCM_UNSPECIFIED;
  465. }
  466. #undef FUNC_NAME
  467. SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
  468. (void),
  469. "")
  470. #define FUNC_NAME s_scm_sys_stringbuf_hist
  471. {
  472. int i;
  473. for (i = 0; i < 1000; i++)
  474. if (lenhist[i])
  475. fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
  476. fprintf (stderr, ">999: %u\n", lenhist[1000]);
  477. return SCM_UNSPECIFIED;
  478. }
  479. #undef FUNC_NAME
  480. #endif
  481. SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
  482. (SCM obj),
  483. "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
  484. #define FUNC_NAME s_scm_string_p
  485. {
  486. return scm_from_bool (IS_STRING (obj));
  487. }
  488. #undef FUNC_NAME
  489. SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
  490. SCM_DEFINE (scm_string, "string", 0, 0, 1,
  491. (SCM chrs),
  492. "@deffnx {Scheme Procedure} list->string chrs\n"
  493. "Return a newly allocated string composed of the arguments,\n"
  494. "@var{chrs}.")
  495. #define FUNC_NAME s_scm_string
  496. {
  497. SCM result;
  498. size_t len;
  499. char *data;
  500. {
  501. long i = scm_ilength (chrs);
  502. SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
  503. len = i;
  504. }
  505. result = scm_i_make_string (len, &data);
  506. while (len > 0 && scm_is_pair (chrs))
  507. {
  508. SCM elt = SCM_CAR (chrs);
  509. SCM_VALIDATE_CHAR (SCM_ARGn, elt);
  510. *data++ = SCM_CHAR (elt);
  511. chrs = SCM_CDR (chrs);
  512. len--;
  513. }
  514. if (len > 0)
  515. scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
  516. if (!scm_is_null (chrs))
  517. scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
  518. return result;
  519. }
  520. #undef FUNC_NAME
  521. SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
  522. (SCM k, SCM chr),
  523. "Return a newly allocated string of\n"
  524. "length @var{k}. If @var{chr} is given, then all elements of\n"
  525. "the string are initialized to @var{chr}, otherwise the contents\n"
  526. "of the @var{string} are unspecified.")
  527. #define FUNC_NAME s_scm_make_string
  528. {
  529. return scm_c_make_string (scm_to_size_t (k), chr);
  530. }
  531. #undef FUNC_NAME
  532. SCM
  533. scm_c_make_string (size_t len, SCM chr)
  534. #define FUNC_NAME NULL
  535. {
  536. char *dst;
  537. SCM res = scm_i_make_string (len, &dst);
  538. if (!SCM_UNBNDP (chr))
  539. {
  540. SCM_VALIDATE_CHAR (0, chr);
  541. memset (dst, SCM_CHAR (chr), len);
  542. }
  543. return res;
  544. }
  545. #undef FUNC_NAME
  546. SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
  547. (SCM string),
  548. "Return the number of characters in @var{string}.")
  549. #define FUNC_NAME s_scm_string_length
  550. {
  551. SCM_VALIDATE_STRING (1, string);
  552. return scm_from_size_t (STRING_LENGTH (string));
  553. }
  554. #undef FUNC_NAME
  555. size_t
  556. scm_c_string_length (SCM string)
  557. {
  558. if (!IS_STRING (string))
  559. scm_wrong_type_arg_msg (NULL, 0, string, "string");
  560. return STRING_LENGTH (string);
  561. }
  562. SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
  563. (SCM str, SCM k),
  564. "Return character @var{k} of @var{str} using zero-origin\n"
  565. "indexing. @var{k} must be a valid index of @var{str}.")
  566. #define FUNC_NAME s_scm_string_ref
  567. {
  568. size_t len;
  569. unsigned long idx;
  570. SCM_VALIDATE_STRING (1, str);
  571. len = scm_i_string_length (str);
  572. if (SCM_LIKELY (len > 0))
  573. idx = scm_to_unsigned_integer (k, 0, len - 1);
  574. else
  575. scm_out_of_range (NULL, k);
  576. return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
  577. }
  578. #undef FUNC_NAME
  579. SCM
  580. scm_c_string_ref (SCM str, size_t p)
  581. {
  582. if (p >= scm_i_string_length (str))
  583. scm_out_of_range (NULL, scm_from_size_t (p));
  584. return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
  585. }
  586. SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
  587. (SCM str, SCM k, SCM chr),
  588. "Store @var{chr} in element @var{k} of @var{str} and return\n"
  589. "an unspecified value. @var{k} must be a valid index of\n"
  590. "@var{str}.")
  591. #define FUNC_NAME s_scm_string_set_x
  592. {
  593. size_t len;
  594. unsigned long idx;
  595. SCM_VALIDATE_STRING (1, str);
  596. len = scm_i_string_length (str);
  597. if (SCM_LIKELY (len > 0))
  598. idx = scm_to_unsigned_integer (k, 0, len - 1);
  599. else
  600. scm_out_of_range (NULL, k);
  601. SCM_VALIDATE_CHAR (3, chr);
  602. {
  603. char *dst = scm_i_string_writable_chars (str);
  604. dst[idx] = SCM_CHAR (chr);
  605. scm_i_string_stop_writing ();
  606. }
  607. return SCM_UNSPECIFIED;
  608. }
  609. #undef FUNC_NAME
  610. void
  611. scm_c_string_set_x (SCM str, size_t p, SCM chr)
  612. {
  613. if (p >= scm_i_string_length (str))
  614. scm_out_of_range (NULL, scm_from_size_t (p));
  615. {
  616. char *dst = scm_i_string_writable_chars (str);
  617. dst[p] = SCM_CHAR (chr);
  618. scm_i_string_stop_writing ();
  619. }
  620. }
  621. SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
  622. (SCM str, SCM start, SCM end),
  623. "Return a newly allocated string formed from the characters\n"
  624. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  625. "ending with index @var{end} (exclusive).\n"
  626. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  627. "exact integers satisfying:\n\n"
  628. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
  629. #define FUNC_NAME s_scm_substring
  630. {
  631. size_t len, from, to;
  632. SCM_VALIDATE_STRING (1, str);
  633. len = scm_i_string_length (str);
  634. from = scm_to_unsigned_integer (start, 0, len);
  635. if (SCM_UNBNDP (end))
  636. to = len;
  637. else
  638. to = scm_to_unsigned_integer (end, from, len);
  639. return scm_i_substring (str, from, to);
  640. }
  641. #undef FUNC_NAME
  642. SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
  643. (SCM str, SCM start, SCM end),
  644. "Return a newly allocated string formed from the characters\n"
  645. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  646. "ending with index @var{end} (exclusive).\n"
  647. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  648. "exact integers satisfying:\n"
  649. "\n"
  650. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
  651. "\n"
  652. "The returned string is read-only.\n")
  653. #define FUNC_NAME s_scm_substring_read_only
  654. {
  655. size_t len, from, to;
  656. SCM_VALIDATE_STRING (1, str);
  657. len = scm_i_string_length (str);
  658. from = scm_to_unsigned_integer (start, 0, len);
  659. if (SCM_UNBNDP (end))
  660. to = len;
  661. else
  662. to = scm_to_unsigned_integer (end, from, len);
  663. return scm_i_substring_read_only (str, from, to);
  664. }
  665. #undef FUNC_NAME
  666. SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
  667. (SCM str, SCM start, SCM end),
  668. "Return a newly allocated string formed from the characters\n"
  669. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  670. "ending with index @var{end} (exclusive).\n"
  671. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  672. "exact integers satisfying:\n\n"
  673. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
  674. #define FUNC_NAME s_scm_substring_copy
  675. {
  676. /* For the Scheme version, START is mandatory, but for the C
  677. version, it is optional. See scm_string_copy in srfi-13.c for a
  678. rationale.
  679. */
  680. size_t from, to;
  681. SCM_VALIDATE_STRING (1, str);
  682. scm_i_get_substring_spec (scm_i_string_length (str),
  683. start, &from, end, &to);
  684. return scm_i_substring_copy (str, from, to);
  685. }
  686. #undef FUNC_NAME
  687. SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
  688. (SCM str, SCM start, SCM end),
  689. "Return string that indirectly refers to the characters\n"
  690. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  691. "ending with index @var{end} (exclusive).\n"
  692. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  693. "exact integers satisfying:\n\n"
  694. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
  695. #define FUNC_NAME s_scm_substring_shared
  696. {
  697. size_t len, from, to;
  698. SCM_VALIDATE_STRING (1, str);
  699. len = scm_i_string_length (str);
  700. from = scm_to_unsigned_integer (start, 0, len);
  701. if (SCM_UNBNDP (end))
  702. to = len;
  703. else
  704. to = scm_to_unsigned_integer (end, from, len);
  705. return scm_i_substring_shared (str, from, to);
  706. }
  707. #undef FUNC_NAME
  708. SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
  709. (SCM args),
  710. "Return a newly allocated string whose characters form the\n"
  711. "concatenation of the given strings, @var{args}.")
  712. #define FUNC_NAME s_scm_string_append
  713. {
  714. SCM res;
  715. size_t i = 0;
  716. SCM l, s;
  717. char *data;
  718. SCM_VALIDATE_REST_ARGUMENT (args);
  719. for (l = args; !scm_is_null (l); l = SCM_CDR (l))
  720. {
  721. s = SCM_CAR (l);
  722. SCM_VALIDATE_STRING (SCM_ARGn, s);
  723. i += scm_i_string_length (s);
  724. }
  725. res = scm_i_make_string (i, &data);
  726. for (l = args; !scm_is_null (l); l = SCM_CDR (l))
  727. {
  728. size_t len;
  729. s = SCM_CAR (l);
  730. SCM_VALIDATE_STRING (SCM_ARGn, s);
  731. len = scm_i_string_length (s);
  732. memcpy (data, scm_i_string_chars (s), len);
  733. data += len;
  734. scm_remember_upto_here_1 (s);
  735. }
  736. return res;
  737. }
  738. #undef FUNC_NAME
  739. int
  740. scm_is_string (SCM obj)
  741. {
  742. return IS_STRING (obj);
  743. }
  744. SCM
  745. scm_from_locale_stringn (const char *str, size_t len)
  746. {
  747. SCM res;
  748. char *dst;
  749. if (len == (size_t)-1)
  750. len = strlen (str);
  751. res = scm_i_make_string (len, &dst);
  752. memcpy (dst, str, len);
  753. return res;
  754. }
  755. SCM
  756. scm_from_locale_string (const char *str)
  757. {
  758. return scm_from_locale_stringn (str, -1);
  759. }
  760. SCM
  761. scm_take_locale_stringn (char *str, size_t len)
  762. {
  763. SCM buf, res;
  764. if (len == (size_t)-1)
  765. len = strlen (str);
  766. else
  767. {
  768. /* Ensure STR is null terminated. A realloc for 1 extra byte should
  769. often be satisfied from the alignment padding after the block, with
  770. no actual data movement. */
  771. str = scm_realloc (str, len+1);
  772. str[len] = '\0';
  773. }
  774. buf = scm_i_take_stringbufn (str, len);
  775. res = scm_double_cell (STRING_TAG,
  776. SCM_UNPACK (buf),
  777. (scm_t_bits) 0, (scm_t_bits) len);
  778. return res;
  779. }
  780. SCM
  781. scm_take_locale_string (char *str)
  782. {
  783. return scm_take_locale_stringn (str, -1);
  784. }
  785. char *
  786. scm_to_locale_stringn (SCM str, size_t *lenp)
  787. {
  788. char *res;
  789. size_t len;
  790. if (!scm_is_string (str))
  791. scm_wrong_type_arg_msg (NULL, 0, str, "string");
  792. len = scm_i_string_length (str);
  793. res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
  794. memcpy (res, scm_i_string_chars (str), len);
  795. if (lenp == NULL)
  796. {
  797. res[len] = '\0';
  798. if (strlen (res) != len)
  799. {
  800. free (res);
  801. scm_misc_error (NULL,
  802. "string contains #\\nul character: ~S",
  803. scm_list_1 (str));
  804. }
  805. }
  806. else
  807. *lenp = len;
  808. scm_remember_upto_here_1 (str);
  809. return res;
  810. }
  811. char *
  812. scm_to_locale_string (SCM str)
  813. {
  814. return scm_to_locale_stringn (str, NULL);
  815. }
  816. size_t
  817. scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
  818. {
  819. size_t len;
  820. if (!scm_is_string (str))
  821. scm_wrong_type_arg_msg (NULL, 0, str, "string");
  822. len = scm_i_string_length (str);
  823. memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
  824. scm_remember_upto_here_1 (str);
  825. return len;
  826. }
  827. /* converts C scm_array of strings to SCM scm_list of strings. */
  828. /* If argc < 0, a null terminated scm_array is assumed. */
  829. SCM
  830. scm_makfromstrs (int argc, char **argv)
  831. {
  832. int i = argc;
  833. SCM lst = SCM_EOL;
  834. if (0 > i)
  835. for (i = 0; argv[i]; i++);
  836. while (i--)
  837. lst = scm_cons (scm_from_locale_string (argv[i]), lst);
  838. return lst;
  839. }
  840. /* Return a newly allocated array of char pointers to each of the strings
  841. in args, with a terminating NULL pointer. */
  842. char **
  843. scm_i_allocate_string_pointers (SCM list)
  844. {
  845. char **result;
  846. int len = scm_ilength (list);
  847. int i;
  848. if (len < 0)
  849. scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
  850. scm_dynwind_begin (0);
  851. result = (char **) scm_malloc ((len + 1) * sizeof (char *));
  852. result[len] = NULL;
  853. scm_dynwind_unwind_handler (free, result, 0);
  854. /* The list might be have been modified in another thread, so
  855. we check LIST before each access.
  856. */
  857. for (i = 0; i < len && scm_is_pair (list); i++)
  858. {
  859. result[i] = scm_to_locale_string (SCM_CAR (list));
  860. list = SCM_CDR (list);
  861. }
  862. scm_dynwind_end ();
  863. return result;
  864. }
  865. void
  866. scm_i_free_string_pointers (char **pointers)
  867. {
  868. int i;
  869. for (i = 0; pointers[i]; i++)
  870. free (pointers[i]);
  871. free (pointers);
  872. }
  873. void
  874. scm_i_get_substring_spec (size_t len,
  875. SCM start, size_t *cstart,
  876. SCM end, size_t *cend)
  877. {
  878. if (SCM_UNBNDP (start))
  879. *cstart = 0;
  880. else
  881. *cstart = scm_to_unsigned_integer (start, 0, len);
  882. if (SCM_UNBNDP (end))
  883. *cend = len;
  884. else
  885. *cend = scm_to_unsigned_integer (end, *cstart, len);
  886. }
  887. #if SCM_ENABLE_DEPRECATED
  888. /* When these definitions are removed, it becomes reasonable to use
  889. read-only strings for string literals. For that, change the reader
  890. to create string literals with scm_c_substring_read_only instead of
  891. with scm_c_substring_copy.
  892. */
  893. int
  894. scm_i_deprecated_stringp (SCM str)
  895. {
  896. scm_c_issue_deprecation_warning
  897. ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
  898. return scm_is_string (str);
  899. }
  900. char *
  901. scm_i_deprecated_string_chars (SCM str)
  902. {
  903. char *chars;
  904. scm_c_issue_deprecation_warning
  905. ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
  906. /* We don't accept shared substrings here since they are not
  907. null-terminated.
  908. */
  909. if (IS_SH_STRING (str))
  910. scm_misc_error (NULL,
  911. "SCM_STRING_CHARS does not work with shared substrings.",
  912. SCM_EOL);
  913. /* We explicitely test for read-only strings to produce a better
  914. error message.
  915. */
  916. if (IS_RO_STRING (str))
  917. scm_misc_error (NULL,
  918. "SCM_STRING_CHARS does not work with read-only strings.",
  919. SCM_EOL);
  920. /* The following is still wrong, of course...
  921. */
  922. chars = scm_i_string_writable_chars (str);
  923. scm_i_string_stop_writing ();
  924. return chars;
  925. }
  926. size_t
  927. scm_i_deprecated_string_length (SCM str)
  928. {
  929. scm_c_issue_deprecation_warning
  930. ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
  931. return scm_c_string_length (str);
  932. }
  933. #endif
  934. void
  935. scm_init_strings ()
  936. {
  937. scm_nullstr = scm_i_make_string (0, NULL);
  938. #include "libguile/strings.x"
  939. }
  940. /*
  941. Local Variables:
  942. c-file-style: "gnu"
  943. End:
  944. */