srfi-13.scm 79 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121
  1. ;;; SRFI 13 string library reference implementation -*- Scheme -*-
  2. ;;; Olin Shivers 7/2000
  3. ;;;
  4. ;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology.
  5. ;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved.
  6. ;;; The details of the copyrights appear at the end of the file. Short
  7. ;;; summary: BSD-style open source.
  8. ;;; Exports:
  9. ;;; string-map string-map!
  10. ;;; string-fold string-unfold
  11. ;;; string-fold-right string-unfold-right
  12. ;;; string-tabulate string-for-each string-for-each-index
  13. ;;; string-every string-any
  14. ;;; string-hash string-hash-ci
  15. ;;; string-compare string-compare-ci
  16. ;;; string= string< string> string<= string>= string<>
  17. ;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
  18. ;;; string-downcase string-upcase string-titlecase
  19. ;;; string-downcase! string-upcase! string-titlecase!
  20. ;;; string-take string-take-right
  21. ;;; string-drop string-drop-right
  22. ;;; string-pad string-pad-right
  23. ;;; string-trim string-trim-right string-trim-both
  24. ;;; string-filter string-delete
  25. ;;; string-index string-index-right
  26. ;;; string-skip string-skip-right
  27. ;;; string-count
  28. ;;; string-prefix-length string-prefix-length-ci
  29. ;;; string-suffix-length string-suffix-length-ci
  30. ;;; string-prefix? string-prefix-ci?
  31. ;;; string-suffix? string-suffix-ci?
  32. ;;; string-contains string-contains-ci
  33. ;;; string-copy! substring/shared
  34. ;;; string-reverse string-reverse! reverse-list->string
  35. ;;; string-concatenate string-concatenate/shared string-concatenate-reverse
  36. ;;; string-append/shared
  37. ;;; xsubstring string-xcopy!
  38. ;;; string-null?
  39. ;;; string-join
  40. ;;; string-tokenize
  41. ;;; string-replace
  42. ;;;
  43. ;;; R5RS extended:
  44. ;;; string->list string-copy string-fill!
  45. ;;;
  46. ;;; R5RS re-exports:
  47. ;;; string? make-string string-length string-ref string-set!
  48. ;;;
  49. ;;; R5RS re-exports (also defined here but commented-out):
  50. ;;; string string-append list->string
  51. ;;;
  52. ;;; Low-level routines:
  53. ;;; make-kmp-restart-vector string-kmp-partial-search kmp-step
  54. ;;; string-parse-start+end
  55. ;;; string-parse-final-start+end
  56. ;;; let-string-start+end
  57. ;;; check-substring-spec
  58. ;;; substring-spec-ok?
  59. ;;; Imports
  60. ;;; This is a fairly large library. While it was written for portability, you
  61. ;;; must be aware of its dependencies in order to run it in a given scheme
  62. ;;; implementation. Here is a complete list of the dependencies it has and the
  63. ;;; assumptions it makes beyond stock R5RS Scheme:
  64. ;;;
  65. ;;; This code has the following non-R5RS dependencies:
  66. ;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro;
  67. ;;;
  68. ;;; - Various imports from the char-set library for the routines that can
  69. ;;; take char-set arguments;
  70. ;;;
  71. ;;; - An ASSERTION-VIOLATION procedure;
  72. ;;;
  73. ;;; - BITWISE-AND for the hash functions;
  74. ;;;
  75. ;;; - A simple CHECK-ARG procedure for checking parameter values; it is
  76. ;;; (lambda (pred val proc)
  77. ;;; (if (pred val) val (assertion-violation 'check-arg "Bad arg" val pred proc)))
  78. ;;;
  79. ;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting &
  80. ;;; type-checking optional parameters from a rest argument;
  81. ;;;
  82. ;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE &
  83. ;;; STRING-TITLECASE! procedures. The former returns true iff a character is
  84. ;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z.
  85. ;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII &
  86. ;;; Latin-1, it is the same as CHAR-UPCASE.
  87. ;;;
  88. ;;; The code depends upon a small set of core string primitives from R5RS:
  89. ;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING
  90. ;;; (Actually, SUBSTRING is not a primitive, but we assume that an
  91. ;;; implementation's native version is probably faster than one we could
  92. ;;; define, so we import it from R5RS.)
  93. ;;;
  94. ;;; The code depends upon a small set of R5RS character primitives:
  95. ;;; char? char=? char-ci=? char<? char-ci<?
  96. ;;; char-upcase char-downcase
  97. ;;; char->integer (for the hash functions)
  98. ;;;
  99. ;;; We assume the following:
  100. ;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE
  101. ;;; - CHAR-CI=? is equivalent to
  102. ;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1))
  103. ;;; (char-downcase (char-upcase c2))))
  104. ;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive
  105. ;;; and consistent with Unicode's 1-1 char-mapping spec.
  106. ;;; These things are typically true, but if not, you would need to modify
  107. ;;; the case-mapping and case-insensitive routines.
  108. ;;; Enough introductory blather. On to the source code. (But see the end of
  109. ;;; the file for further notes on porting & performance tuning.)
  110. ; Start S48 additions
  111. (define (check-arg pred val caller)
  112. (if (not (pred val))
  113. (assertion-violation caller "invalid argument" val))
  114. val)
  115. (define-syntax :optional
  116. (syntax-rules ()
  117. ((:optional rest default-exp)
  118. (let ((maybe-arg rest))
  119. (if (pair? maybe-arg)
  120. (if (null? (cdr maybe-arg)) (car maybe-arg)
  121. (apply assertion-violation ':optional
  122. "too many optional arguments" maybe-arg))
  123. default-exp)))
  124. ((:optional rest default-exp arg-test)
  125. (let ((maybe-arg rest))
  126. (if (pair? maybe-arg)
  127. (if (null? (cdr maybe-arg))
  128. (let ((val (car maybe-arg)))
  129. (if (arg-test val)
  130. val
  131. (assertion-violation ':optional
  132. "Optional argument failed test"
  133. arg-test val)))
  134. (apply assertion-violation ':optional
  135. "too many optional arguments"
  136. maybe-arg))
  137. default-exp)))))
  138. (define-syntax let-optionals*
  139. (syntax-rules ()
  140. ((let-optionals* arg (opt-clause ...) body ...)
  141. (let ((rest arg))
  142. (%let-optionals* rest (opt-clause ...) body ...)))))
  143. (define-syntax %let-optionals*
  144. (syntax-rules ()
  145. ((%let-optionals* arg (((var ...) xparser) opt-clause ...) body ...)
  146. (call-with-values (lambda () (xparser arg))
  147. (lambda (rest var ...)
  148. (%let-optionals* rest (opt-clause ...) body ...))))
  149. ((%let-optionals* arg ((var default) opt-clause ...) body ...)
  150. (call-with-values (lambda () (if (null? arg) (values default '())
  151. (values (car arg) (cdr arg))))
  152. (lambda (var rest)
  153. (%let-optionals* rest (opt-clause ...) body ...))))
  154. ((%let-optionals* arg ((var default test) opt-clause ...) body ...)
  155. (call-with-values (lambda ()
  156. (if (null? arg) (values default '())
  157. (let ((var (car arg)))
  158. (if test (values var (cdr arg))
  159. (assertion-violation 'let-opt
  160. "arg failed LET-OPT test" var)))))
  161. (lambda (var rest)
  162. (%let-optionals* rest (opt-clause ...) body ...))))
  163. ((%let-optionals* arg ((var default test supplied?) opt-clause ...) body ...)
  164. (call-with-values (lambda ()
  165. (if (null? arg) (values default #f '())
  166. (let ((var (car arg)))
  167. (if test (values var #t (cdr arg))
  168. (assertion-violation 'let-opt
  169. "arg failed LET-OPT test" var)))))
  170. (lambda (var supplied? rest)
  171. (%let-optionals* rest (opt-clause ...) body ...))))
  172. ((%let-optionals* arg (rest) body ...)
  173. (let ((rest arg)) body ...))
  174. ((%let-optionals* arg () body ...)
  175. (if (null? arg) (begin body ...)
  176. (assertion-violation 'let-opt "Too many arguments in let-opt" arg)))))
  177. (define (char-cased? c)
  178. (or (char-lower-case? c)
  179. (char-upper-case? c)
  180. (char-title-case? c)))
  181. ; End S48 additions
  182. ;;; Support for START/END substring specs
  183. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  184. ;;; This macro parses optional start/end arguments from arg lists, defaulting
  185. ;;; them to 0/(string-length s), and checks them for correctness.
  186. (define-syntax let-string-start+end
  187. (syntax-rules ()
  188. ((let-string-start+end (start end) proc s-exp args-exp body ...)
  189. (receive (start end) (string-parse-final-start+end 'proc s-exp args-exp)
  190. body ...))
  191. ((let-string-start+end (start end rest) proc s-exp args-exp body ...)
  192. (receive (rest start end) (string-parse-start+end 'proc s-exp args-exp)
  193. body ...))))
  194. ;;; This one parses out a *pair* of final start/end indices.
  195. ;;; Not exported; for internal use.
  196. (define-syntax let-string-start+end2
  197. (syntax-rules ()
  198. ((l-s-s+e2 (start1 end1 start2 end2) proc s1 s2 args body ...)
  199. (let ((procv proc)) ; Make sure PROC is only evaluated once.
  200. (let-string-start+end (start1 end1 rest) procv s1 args
  201. (let-string-start+end (start2 end2) procv s2 rest
  202. body ...))))))
  203. ;;; Returns three values: rest start end
  204. (define (string-parse-start+end proc s args)
  205. (if (not (string? s)) (assertion-violation proc "Non-string value" s))
  206. (let ((slen (string-length s)))
  207. (if (pair? args)
  208. (let ((start (car args))
  209. (args (cdr args)))
  210. (if (and (integer? start) (exact? start) (>= start 0))
  211. (receive (end args)
  212. (if (pair? args)
  213. (let ((end (car args))
  214. (args (cdr args)))
  215. (if (and (integer? end) (exact? end) (<= end slen))
  216. (values end args)
  217. (assertion-violation proc "Illegal substring END spec"
  218. end s)))
  219. (values slen args))
  220. (if (<= start end) (values args start end)
  221. (assertion-violation proc "Illegal substring START/END spec"
  222. start end s)))
  223. (assertion-violation proc "Illegal substring START spec" start s)))
  224. (values '() 0 slen))))
  225. (define (string-parse-final-start+end proc s args)
  226. (receive (rest start end) (string-parse-start+end proc s args)
  227. (if (pair? rest)
  228. (assertion-violation proc "Extra arguments to procedure" rest)
  229. (values start end))))
  230. (define (substring-spec-ok? s start end)
  231. (and (string? s)
  232. (integer? start)
  233. (exact? start)
  234. (integer? end)
  235. (exact? end)
  236. (<= 0 start)
  237. (<= start end)
  238. (<= end (string-length s))))
  239. (define (check-substring-spec proc s start end)
  240. (if (not (substring-spec-ok? s start end))
  241. (assertion-violation proc "Illegal substring spec." s start end)))
  242. ;;; Defined by R5RS, so commented out here.
  243. ;(define (string . chars)
  244. ; (let* ((len (length chars))
  245. ; (ans (make-string len)))
  246. ; (do ((i 0 (+ i 1))
  247. ; (chars chars (cdr chars)))
  248. ; ((>= i len))
  249. ; (string-set! ans i (car chars)))
  250. ; ans))
  251. ;
  252. ;(define (string . chars) (string-unfold null? car cdr chars))
  253. ;;; substring/shared S START [END]
  254. ;;; string-copy S [START END]
  255. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  256. ;;; All this goop is just arg parsing & checking surrounding a call to the
  257. ;;; actual primitive, %SUBSTRING/SHARED.
  258. (define (substring/shared s start . maybe-end)
  259. (check-arg string? s 'substring/shared)
  260. (let ((slen (string-length s)))
  261. (check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start)))
  262. start 'substring/shared)
  263. (%substring/shared s start
  264. (:optional maybe-end slen
  265. (lambda (end) (and (integer? end)
  266. (exact? end)
  267. (<= start end)
  268. (<= end slen)))))))
  269. ;;; Split out so that other routines in this library can avoid arg-parsing
  270. ;;; overhead for END parameter.
  271. (define (%substring/shared s start end)
  272. (if (and (zero? start) (= end (string-length s))) s
  273. (substring s start end)))
  274. (define (string-copy s . maybe-start+end)
  275. (let-string-start+end (start end) string-copy s maybe-start+end
  276. (substring s start end)))
  277. ;This library uses the R5RS SUBSTRING, but doesn't export it.
  278. ;Here is a definition, just for completeness.
  279. ;(define (substring s start end)
  280. ; (check-substring-spec 'substring s start end)
  281. ; (let* ((slen (- end start))
  282. ; (ans (make-string slen)))
  283. ; (do ((i 0 (+ i 1))
  284. ; (j start (+ j 1)))
  285. ; ((>= i slen) ans)
  286. ; (string-set! ans i (string-ref s j)))))
  287. ;;; Basic iterators and other higher-order abstractions
  288. ;;; (string-map proc s [start end])
  289. ;;; (string-map! proc s [start end])
  290. ;;; (string-fold kons knil s [start end])
  291. ;;; (string-fold-right kons knil s [start end])
  292. ;;; (string-unfold p f g seed [base make-final])
  293. ;;; (string-unfold-right p f g seed [base make-final])
  294. ;;; (string-for-each proc s [start end])
  295. ;;; (string-for-each-index proc s [start end])
  296. ;;; (string-every char-set/char/pred s [start end])
  297. ;;; (string-any char-set/char/pred s [start end])
  298. ;;; (string-tabulate proc len)
  299. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  300. ;;; You want compiler support for high-level transforms on fold and unfold ops.
  301. ;;; You'd at least like a lot of inlining for clients of these procedures.
  302. ;;; Don't hold your breath.
  303. (define (string-map proc s . maybe-start+end)
  304. (check-arg procedure? proc 'string-map)
  305. (let-string-start+end (start end) string-map s maybe-start+end
  306. (%string-map proc s start end)))
  307. (define (%string-map proc s start end) ; Internal utility
  308. (let* ((len (- end start))
  309. (ans (make-string len)))
  310. (do ((i (- end 1) (- i 1))
  311. (j (- len 1) (- j 1)))
  312. ((< j 0))
  313. (string-set! ans j (proc (string-ref s i))))
  314. ans))
  315. (define (string-map! proc s . maybe-start+end)
  316. (check-arg procedure? proc 'string-map!)
  317. (let-string-start+end (start end) string-map! s maybe-start+end
  318. (%string-map! proc s start end)))
  319. (define (%string-map! proc s start end)
  320. (do ((i (- end 1) (- i 1)))
  321. ((< i start))
  322. (string-set! s i (proc (string-ref s i)))))
  323. (define (string-fold kons knil s . maybe-start+end)
  324. (check-arg procedure? kons 'string-fold)
  325. (let-string-start+end (start end) string-fold s maybe-start+end
  326. (let lp ((v knil) (i start))
  327. (if (< i end) (lp (kons (string-ref s i) v) (+ i 1))
  328. v))))
  329. (define (string-fold-right kons knil s . maybe-start+end)
  330. (check-arg procedure? kons 'string-fold-right)
  331. (let-string-start+end (start end) string-fold-right s maybe-start+end
  332. (let lp ((v knil) (i (- end 1)))
  333. (if (>= i start) (lp (kons (string-ref s i) v) (- i 1))
  334. v))))
  335. ;;; (string-unfold p f g seed [base make-final])
  336. ;;; This is the fundamental constructor for strings.
  337. ;;; - G is used to generate a series of "seed" values from the initial seed:
  338. ;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ...
  339. ;;; - P tells us when to stop -- when it returns true when applied to one
  340. ;;; of these seed values.
  341. ;;; - F maps each seed value to the corresponding character
  342. ;;; in the result string. These chars are assembled into the
  343. ;;; string in a left-to-right order.
  344. ;;; - BASE is the optional initial/leftmost portion of the constructed string;
  345. ;;; it defaults to the empty string "".
  346. ;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns
  347. ;;; true) to produce the final/rightmost portion of the constructed string.
  348. ;;; It defaults to (LAMBDA (X) "").
  349. ;;;
  350. ;;; In other words, the following (simple, inefficient) definition holds:
  351. ;;; (define (string-unfold p f g seed base make-final)
  352. ;;; (string-append base
  353. ;;; (let recur ((seed seed))
  354. ;;; (if (p seed) (make-final seed)
  355. ;;; (string-append (string (f seed))
  356. ;;; (recur (g seed)))))))
  357. ;;;
  358. ;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to
  359. ;;; reverse a string, copy a string, convert a list to a string, read
  360. ;;; a port into a string, and so forth. Examples:
  361. ;;; (port->string port) =
  362. ;;; (string-unfold (compose eof-object? peek-char)
  363. ;;; read-char values port)
  364. ;;;
  365. ;;; (list->string lis) = (string-unfold null? car cdr lis)
  366. ;;;
  367. ;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0)
  368. ;;; A problem with the following simple formulation is that it pushes one
  369. ;;; stack frame for every char in the result string -- an issue if you are
  370. ;;; using it to read a 100kchar string. So we don't use it -- but I include
  371. ;;; it to give a clear, straightforward description of what the function
  372. ;;; does.
  373. ;(define (string-unfold p f g seed base make-final)
  374. ; (let ((ans (let recur ((seed seed) (i (string-length base)))
  375. ; (if (p seed)
  376. ; (let* ((final (make-final seed))
  377. ; (ans (make-string (+ i (string-length final)))))
  378. ; (string-copy! ans i final)
  379. ; ans)
  380. ;
  381. ; (let* ((c (f seed))
  382. ; (s (recur (g seed) (+ i 1))))
  383. ; (string-set! s i c)
  384. ; s)))))
  385. ; (string-copy! ans 0 base)
  386. ; ans))
  387. ;;; The strategy is to allocate a series of chunks into which we stash the
  388. ;;; chars as we generate them. Chunk size goes up in powers of two starting
  389. ;;; with 40 and levelling out at 4k, i.e.
  390. ;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096...
  391. ;;; This should work pretty well for short strings, 1-line (80 char) strings,
  392. ;;; and longer ones. When done, we allocate an answer string and copy the
  393. ;;; chars over from the chunk buffers.
  394. (define (string-unfold p f g seed . base+make-final)
  395. (check-arg procedure? p 'string-unfold)
  396. (check-arg procedure? f 'string-unfold)
  397. (check-arg procedure? g 'string-unfold)
  398. (let-optionals* base+make-final
  399. ((base "" (string? base))
  400. (make-final (lambda (x) "") (procedure? make-final)))
  401. (let lp ((chunks '()) ; Previously filled chunks
  402. (nchars 0) ; Number of chars in CHUNKS
  403. (chunk (make-string 40)) ; Current chunk into which we write
  404. (chunk-len 40)
  405. (i 0) ; Number of chars written into CHUNK
  406. (seed seed))
  407. (let lp2 ((i i) (seed seed))
  408. (if (not (p seed))
  409. (let ((c (f seed))
  410. (seed (g seed)))
  411. (if (< i chunk-len)
  412. (begin (string-set! chunk i c)
  413. (lp2 (+ i 1) seed))
  414. (let* ((nchars2 (+ chunk-len nchars))
  415. (chunk-len2 (min 4096 nchars2))
  416. (new-chunk (make-string chunk-len2)))
  417. (string-set! new-chunk 0 c)
  418. (lp (cons chunk chunks) (+ nchars chunk-len)
  419. new-chunk chunk-len2 1 seed))))
  420. ;; We're done. Make the answer string & install the bits.
  421. (let* ((final (make-final seed))
  422. (flen (string-length final))
  423. (base-len (string-length base))
  424. (j (+ base-len nchars i))
  425. (ans (make-string (+ j flen))))
  426. (%string-copy! ans j final 0 flen) ; Install FINAL.
  427. (let ((j (- j i)))
  428. (%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I).
  429. (let lp ((j j) (chunks chunks)) ; Install CHUNKS.
  430. (if (pair? chunks)
  431. (let* ((chunk (car chunks))
  432. (chunks (cdr chunks))
  433. (chunk-len (string-length chunk))
  434. (j (- j chunk-len)))
  435. (%string-copy! ans j chunk 0 chunk-len)
  436. (lp j chunks)))))
  437. (%string-copy! ans 0 base 0 base-len) ; Install BASE.
  438. ans))))))
  439. (define (string-unfold-right p f g seed . base+make-final)
  440. (let-optionals* base+make-final
  441. ((base "" (string? base))
  442. (make-final (lambda (x) "") (procedure? make-final)))
  443. (let lp ((chunks '()) ; Previously filled chunks
  444. (nchars 0) ; Number of chars in CHUNKS
  445. (chunk (make-string 40)) ; Current chunk into which we write
  446. (chunk-len 40)
  447. (i 40) ; Number of chars available in CHUNK
  448. (seed seed))
  449. (let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right
  450. (if (not (p seed)) ; to left.
  451. (let ((c (f seed))
  452. (seed (g seed)))
  453. (if (> i 0)
  454. (let ((i (- i 1)))
  455. (string-set! chunk i c)
  456. (lp2 i seed))
  457. (let* ((nchars2 (+ chunk-len nchars))
  458. (chunk-len2 (min 4096 nchars2))
  459. (new-chunk (make-string chunk-len2))
  460. (i (- chunk-len2 1)))
  461. (string-set! new-chunk i c)
  462. (lp (cons chunk chunks) (+ nchars chunk-len)
  463. new-chunk chunk-len2 i seed))))
  464. ;; We're done. Make the answer string & install the bits.
  465. (let* ((final (make-final seed))
  466. (flen (string-length final))
  467. (base-len (string-length base))
  468. (chunk-used (- chunk-len i))
  469. (j (+ base-len nchars chunk-used))
  470. (ans (make-string (+ j flen))))
  471. (%string-copy! ans 0 final 0 flen) ; Install FINAL.
  472. (%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,).
  473. (let lp ((j (+ flen chunk-used)) ; Install CHUNKS.
  474. (chunks chunks))
  475. (if (pair? chunks)
  476. (let* ((chunk (car chunks))
  477. (chunks (cdr chunks))
  478. (chunk-len (string-length chunk)))
  479. (%string-copy! ans j chunk 0 chunk-len)
  480. (lp (+ j chunk-len) chunks))
  481. (%string-copy! ans j base 0 base-len))); Install BASE.
  482. ans))))))
  483. (define (string-for-each proc s . maybe-start+end)
  484. (check-arg procedure? proc 'string-for-each)
  485. (let-string-start+end (start end) string-for-each s maybe-start+end
  486. (let lp ((i start))
  487. (if (< i end)
  488. (begin (proc (string-ref s i))
  489. (lp (+ i 1)))))))
  490. (define (string-for-each-index proc s . maybe-start+end)
  491. (check-arg procedure? proc 'string-for-each-index)
  492. (let-string-start+end (start end) string-for-each-index s maybe-start+end
  493. (let lp ((i start))
  494. (if (< i end) (begin (proc i) (lp (+ i 1)))))))
  495. (define (string-every criterion s . maybe-start+end)
  496. (let-string-start+end (start end) string-every s maybe-start+end
  497. (cond ((char? criterion)
  498. (let lp ((i start))
  499. (or (>= i end)
  500. (and (char=? criterion (string-ref s i))
  501. (lp (+ i 1))))))
  502. ((char-set? criterion)
  503. (let lp ((i start))
  504. (or (>= i end)
  505. (and (char-set-contains? criterion (string-ref s i))
  506. (lp (+ i 1))))))
  507. ((procedure? criterion) ; Slightly funky loop so that
  508. (or (= start end) ; final (PRED S[END-1]) call
  509. (let lp ((i start)) ; is a tail call.
  510. (let ((c (string-ref s i))
  511. (i1 (+ i 1)))
  512. (if (= i1 end) (criterion c) ; Tail call.
  513. (and (criterion c) (lp i1)))))))
  514. (else (assertion-violation 'string-every
  515. "Second param is neither char-set, char, or predicate procedure."
  516. criterion)))))
  517. (define (string-any criterion s . maybe-start+end)
  518. (let-string-start+end (start end) string-any s maybe-start+end
  519. (cond ((char? criterion)
  520. (let lp ((i start))
  521. (and (< i end)
  522. (or (char=? criterion (string-ref s i))
  523. (lp (+ i 1))))))
  524. ((char-set? criterion)
  525. (let lp ((i start))
  526. (and (< i end)
  527. (or (char-set-contains? criterion (string-ref s i))
  528. (lp (+ i 1))))))
  529. ((procedure? criterion) ; Slightly funky loop so that
  530. (and (< start end) ; final (PRED S[END-1]) call
  531. (let lp ((i start)) ; is a tail call.
  532. (let ((c (string-ref s i))
  533. (i1 (+ i 1)))
  534. (if (= i1 end) (criterion c) ; Tail call
  535. (or (criterion c) (lp i1)))))))
  536. (else (assertion-violation 'string-any
  537. "Second param is neither char-set, char, or predicate procedure."
  538. criterion)))))
  539. (define (string-tabulate proc len)
  540. (check-arg procedure? proc 'string-tabulate)
  541. (check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val)))
  542. len 'string-tabulate)
  543. (let ((s (make-string len)))
  544. (do ((i (- len 1) (- i 1)))
  545. ((< i 0))
  546. (string-set! s i (proc i)))
  547. s))
  548. ;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2]
  549. ;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2]
  550. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  551. ;;; Find the length of the common prefix/suffix.
  552. ;;; It is not required that the two substrings passed be of equal length.
  553. ;;; This was microcode in MIT Scheme -- a very tightly bummed primitive.
  554. ;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons,
  555. ;;; so should be as tense as possible.
  556. (define (%string-prefix-length s1 start1 end1 s2 start2 end2)
  557. (let* ((delta (min (- end1 start1) (- end2 start2)))
  558. (end1 (+ start1 delta)))
  559. (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path
  560. delta
  561. (let lp ((i start1) (j start2)) ; Regular path
  562. (if (or (>= i end1)
  563. (not (char=? (string-ref s1 i)
  564. (string-ref s2 j))))
  565. (- i start1)
  566. (lp (+ i 1) (+ j 1)))))))
  567. (define (%string-suffix-length s1 start1 end1 s2 start2 end2)
  568. (let* ((delta (min (- end1 start1) (- end2 start2)))
  569. (start1 (- end1 delta)))
  570. (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path
  571. delta
  572. (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path
  573. (if (or (< i start1)
  574. (not (char=? (string-ref s1 i)
  575. (string-ref s2 j))))
  576. (- (- end1 i) 1)
  577. (lp (- i 1) (- j 1)))))))
  578. (define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)
  579. (let* ((delta (min (- end1 start1) (- end2 start2)))
  580. (end1 (+ start1 delta)))
  581. (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path
  582. delta
  583. (let lp ((i start1) (j start2)) ; Regular path
  584. (if (or (>= i end1)
  585. (not (char-ci=? (string-ref s1 i)
  586. (string-ref s2 j))))
  587. (- i start1)
  588. (lp (+ i 1) (+ j 1)))))))
  589. (define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2)
  590. (let* ((delta (min (- end1 start1) (- end2 start2)))
  591. (start1 (- end1 delta)))
  592. (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path
  593. delta
  594. (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path
  595. (if (or (< i start1)
  596. (not (char-ci=? (string-ref s1 i)
  597. (string-ref s2 j))))
  598. (- (- end1 i) 1)
  599. (lp (- i 1) (- j 1)))))))
  600. (define (string-prefix-length s1 s2 . maybe-starts+ends)
  601. (let-string-start+end2 (start1 end1 start2 end2)
  602. string-prefix-length s1 s2 maybe-starts+ends
  603. (%string-prefix-length s1 start1 end1 s2 start2 end2)))
  604. (define (string-suffix-length s1 s2 . maybe-starts+ends)
  605. (let-string-start+end2 (start1 end1 start2 end2)
  606. string-suffix-length s1 s2 maybe-starts+ends
  607. (%string-suffix-length s1 start1 end1 s2 start2 end2)))
  608. (define (string-prefix-length-ci s1 s2 . maybe-starts+ends)
  609. (let-string-start+end2 (start1 end1 start2 end2)
  610. string-prefix-length-ci s1 s2 maybe-starts+ends
  611. (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)))
  612. (define (string-suffix-length-ci s1 s2 . maybe-starts+ends)
  613. (let-string-start+end2 (start1 end1 start2 end2)
  614. string-suffix-length-ci s1 s2 maybe-starts+ends
  615. (%string-suffix-length-ci s1 start1 end1 s2 start2 end2)))
  616. ;;; string-prefix? s1 s2 [start1 end1 start2 end2]
  617. ;;; string-suffix? s1 s2 [start1 end1 start2 end2]
  618. ;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2]
  619. ;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2]
  620. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  621. ;;; These are all simple derivatives of the previous counting funs.
  622. (define (string-prefix? s1 s2 . maybe-starts+ends)
  623. (let-string-start+end2 (start1 end1 start2 end2)
  624. string-prefix? s1 s2 maybe-starts+ends
  625. (%string-prefix? s1 start1 end1 s2 start2 end2)))
  626. (define (string-suffix? s1 s2 . maybe-starts+ends)
  627. (let-string-start+end2 (start1 end1 start2 end2)
  628. string-suffix? s1 s2 maybe-starts+ends
  629. (%string-suffix? s1 start1 end1 s2 start2 end2)))
  630. (define (string-prefix-ci? s1 s2 . maybe-starts+ends)
  631. (let-string-start+end2 (start1 end1 start2 end2)
  632. string-prefix-ci? s1 s2 maybe-starts+ends
  633. (%string-prefix-ci? s1 start1 end1 s2 start2 end2)))
  634. (define (string-suffix-ci? s1 s2 . maybe-starts+ends)
  635. (let-string-start+end2 (start1 end1 start2 end2)
  636. string-suffix-ci? s1 s2 maybe-starts+ends
  637. (%string-suffix-ci? s1 start1 end1 s2 start2 end2)))
  638. ;;; Here are the internal routines that do the real work.
  639. (define (%string-prefix? s1 start1 end1 s2 start2 end2)
  640. (let ((len1 (- end1 start1)))
  641. (and (<= len1 (- end2 start2)) ; Quick check
  642. (= (%string-prefix-length s1 start1 end1
  643. s2 start2 end2)
  644. len1))))
  645. (define (%string-suffix? s1 start1 end1 s2 start2 end2)
  646. (let ((len1 (- end1 start1)))
  647. (and (<= len1 (- end2 start2)) ; Quick check
  648. (= len1 (%string-suffix-length s1 start1 end1
  649. s2 start2 end2)))))
  650. (define (%string-prefix-ci? s1 start1 end1 s2 start2 end2)
  651. (let ((len1 (- end1 start1)))
  652. (and (<= len1 (- end2 start2)) ; Quick check
  653. (= len1 (%string-prefix-length-ci s1 start1 end1
  654. s2 start2 end2)))))
  655. (define (%string-suffix-ci? s1 start1 end1 s2 start2 end2)
  656. (let ((len1 (- end1 start1)))
  657. (and (<= len1 (- end2 start2)) ; Quick check
  658. (= len1 (%string-suffix-length-ci s1 start1 end1
  659. s2 start2 end2)))))
  660. ;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2]
  661. ;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2]
  662. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  663. ;;; Primitive string-comparison functions.
  664. ;;; Continuation order is different from MIT Scheme.
  665. ;;; Continuations are applied to s1's mismatch index;
  666. ;;; in the case of equality, this is END1.
  667. (define (%string-compare s1 start1 end1 s2 start2 end2
  668. proc< proc= proc>)
  669. (let ((size1 (- end1 start1))
  670. (size2 (- end2 start2)))
  671. (let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2)))
  672. (if (= match size1)
  673. ((if (= match size2) proc= proc<) end1)
  674. ((if (= match size2)
  675. proc>
  676. (if (char<? (string-ref s1 (+ start1 match))
  677. (string-ref s2 (+ start2 match)))
  678. proc< proc>))
  679. (+ match start1))))))
  680. (define (%string-compare-ci s1 start1 end1 s2 start2 end2
  681. proc< proc= proc>)
  682. (let ((size1 (- end1 start1))
  683. (size2 (- end2 start2)))
  684. (let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)))
  685. (if (= match size1)
  686. ((if (= match size2) proc= proc<) end1)
  687. ((if (= match size2) proc>
  688. (if (char-ci<? (string-ref s1 (+ start1 match))
  689. (string-ref s2 (+ start2 match)))
  690. proc< proc>))
  691. (+ start1 match))))))
  692. (define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends)
  693. (check-arg procedure? proc< 'string-compare)
  694. (check-arg procedure? proc= 'string-compare)
  695. (check-arg procedure? proc> 'string-compare)
  696. (let-string-start+end2 (start1 end1 start2 end2)
  697. string-compare s1 s2 maybe-starts+ends
  698. (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>)))
  699. (define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends)
  700. (check-arg procedure? proc< 'string-compare-ci)
  701. (check-arg procedure? proc= 'string-compare-ci)
  702. (check-arg procedure? proc> 'string-compare-ci)
  703. (let-string-start+end2 (start1 end1 start2 end2)
  704. string-compare-ci s1 s2 maybe-starts+ends
  705. (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>)))
  706. ;;; string= string<> string-ci= string-ci<>
  707. ;;; string< string> string-ci< string-ci>
  708. ;;; string<= string>= string-ci<= string-ci>=
  709. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  710. ;;; Simple definitions in terms of the previous comparison funs.
  711. ;;; I sure hope the %STRING-COMPARE calls get integrated.
  712. (define (string= s1 s2 . maybe-starts+ends)
  713. (let-string-start+end2 (start1 end1 start2 end2)
  714. string= s1 s2 maybe-starts+ends
  715. (and (= (- end1 start1) (- end2 start2)) ; Quick filter
  716. (or (and (eq? s1 s2) (= start1 start2)) ; Fast path
  717. (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
  718. (lambda (i) #f)
  719. (lambda (i) #t)
  720. (lambda (i) #f))))))
  721. (define (string<> s1 s2 . maybe-starts+ends)
  722. (let-string-start+end2 (start1 end1 start2 end2)
  723. string<> s1 s2 maybe-starts+ends
  724. (or (not (= (- end1 start1) (- end2 start2))) ; Fast path
  725. (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter
  726. (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
  727. (lambda (i) #t)
  728. (lambda (i) #f)
  729. (lambda (i) #t))))))
  730. (define (string< s1 s2 . maybe-starts+ends)
  731. (let-string-start+end2 (start1 end1 start2 end2)
  732. string< s1 s2 maybe-starts+ends
  733. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  734. (< end1 end2)
  735. (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
  736. (lambda (i) #t)
  737. (lambda (i) #f)
  738. (lambda (i) #f)))))
  739. (define (string> s1 s2 . maybe-starts+ends)
  740. (let-string-start+end2 (start1 end1 start2 end2)
  741. string> s1 s2 maybe-starts+ends
  742. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  743. (> end1 end2)
  744. (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
  745. (lambda (i) #f)
  746. (lambda (i) #f)
  747. (lambda (i) #t)))))
  748. (define (string<= s1 s2 . maybe-starts+ends)
  749. (let-string-start+end2 (start1 end1 start2 end2)
  750. string<= s1 s2 maybe-starts+ends
  751. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  752. (<= end1 end2)
  753. (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
  754. (lambda (i) #t)
  755. (lambda (i) #t)
  756. (lambda (i) #f)))))
  757. (define (string>= s1 s2 . maybe-starts+ends)
  758. (let-string-start+end2 (start1 end1 start2 end2)
  759. string>= s1 s2 maybe-starts+ends
  760. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  761. (>= end1 end2)
  762. (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
  763. (lambda (i) #f)
  764. (lambda (i) #t)
  765. (lambda (i) #t)))))
  766. (define (string-ci= s1 s2 . maybe-starts+ends)
  767. (let-string-start+end2 (start1 end1 start2 end2)
  768. string-ci= s1 s2 maybe-starts+ends
  769. (and (= (- end1 start1) (- end2 start2)) ; Quick filter
  770. (or (and (eq? s1 s2) (= start1 start2)) ; Fast path
  771. (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
  772. (lambda (i) #f)
  773. (lambda (i) #t)
  774. (lambda (i) #f))))))
  775. (define (string-ci<> s1 s2 . maybe-starts+ends)
  776. (let-string-start+end2 (start1 end1 start2 end2)
  777. string-ci<> s1 s2 maybe-starts+ends
  778. (or (not (= (- end1 start1) (- end2 start2))) ; Fast path
  779. (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter
  780. (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
  781. (lambda (i) #t)
  782. (lambda (i) #f)
  783. (lambda (i) #t))))))
  784. (define (string-ci< s1 s2 . maybe-starts+ends)
  785. (let-string-start+end2 (start1 end1 start2 end2)
  786. string-ci< s1 s2 maybe-starts+ends
  787. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  788. (< end1 end2)
  789. (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
  790. (lambda (i) #t)
  791. (lambda (i) #f)
  792. (lambda (i) #f)))))
  793. (define (string-ci> s1 s2 . maybe-starts+ends)
  794. (let-string-start+end2 (start1 end1 start2 end2)
  795. string-ci> s1 s2 maybe-starts+ends
  796. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  797. (> end1 end2)
  798. (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
  799. (lambda (i) #f)
  800. (lambda (i) #f)
  801. (lambda (i) #t)))))
  802. (define (string-ci<= s1 s2 . maybe-starts+ends)
  803. (let-string-start+end2 (start1 end1 start2 end2)
  804. string-ci<= s1 s2 maybe-starts+ends
  805. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  806. (<= end1 end2)
  807. (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
  808. (lambda (i) #t)
  809. (lambda (i) #t)
  810. (lambda (i) #f)))))
  811. (define (string-ci>= s1 s2 . maybe-starts+ends)
  812. (let-string-start+end2 (start1 end1 start2 end2)
  813. string-ci>= s1 s2 maybe-starts+ends
  814. (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
  815. (>= end1 end2)
  816. (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
  817. (lambda (i) #f)
  818. (lambda (i) #t)
  819. (lambda (i) #t)))))
  820. ;;; Hash
  821. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  822. ;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in
  823. ;;; to keep the intermediate values small. (We do the calculation with just
  824. ;;; enough bits to represent BOUND, masking off high bits at each step in
  825. ;;; calculation. If this screws up any important properties of the hash
  826. ;;; function I'd like to hear about it. -Olin)
  827. ;;;
  828. ;;; If you keep BOUND small enough, the intermediate calculations will
  829. ;;; always be fixnums. How small is dependent on the underlying Scheme system;
  830. ;;; we use a default BOUND of 2^22 = 4194304, which should hack it in
  831. ;;; Schemes that give you at least 29 signed bits for fixnums. The core
  832. ;;; calculation that you don't want to overflow is, worst case,
  833. ;;; (+ 65535 (* 37 (- bound 1)))
  834. ;;; where 65535 is the max character code. Choose the default BOUND to be the
  835. ;;; biggest power of two that won't cause this expression to fixnum overflow,
  836. ;;; and everything will be copacetic.
  837. (define (%string-hash s char->int bound start end)
  838. (let ((iref (lambda (s i) (char->int (string-ref s i))))
  839. ;; Compute a 111...1 mask that will cover BOUND-1:
  840. (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh?
  841. (if (>= i bound) (- i 1) (lp (+ i i))))))
  842. (let lp ((i start) (ans 0))
  843. (if (>= i end) (modulo ans bound)
  844. (lp (+ i 1) (bitwise-and mask (+ (* 37 ans) (iref s i))))))))
  845. (define (string-hash s . maybe-bound+start+end)
  846. (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound)
  847. (exact? bound)
  848. (<= 0 bound)))
  849. rest)
  850. (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default.
  851. (let-string-start+end (start end) string-hash s rest
  852. (%string-hash s char->integer bound start end)))))
  853. (define (string-hash-ci s . maybe-bound+start+end)
  854. (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound)
  855. (exact? bound)
  856. (<= 0 bound)))
  857. rest)
  858. (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default.
  859. (let-string-start+end (start end) string-hash-ci s rest
  860. (%string-hash s (lambda (c) (char->integer (char-downcase c)))
  861. bound start end)))))
  862. ;;; Case hacking
  863. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  864. ;;; string-upcase s [start end]
  865. ;;; string-upcase! s [start end]
  866. ;;; string-downcase s [start end]
  867. ;;; string-downcase! s [start end]
  868. ;;;
  869. ;;; string-titlecase s [start end]
  870. ;;; string-titlecase! s [start end]
  871. ;;; Capitalize every contiguous alpha sequence: capitalise
  872. ;;; first char, lowercase rest.
  873. (define (string-upcase s . maybe-start+end)
  874. (let-string-start+end (start end) string-upcase s maybe-start+end
  875. (%string-map char-upcase s start end)))
  876. (define (string-upcase! s . maybe-start+end)
  877. (let-string-start+end (start end) string-upcase! s maybe-start+end
  878. (%string-map! char-upcase s start end)))
  879. (define (string-downcase s . maybe-start+end)
  880. (let-string-start+end (start end) string-downcase s maybe-start+end
  881. (%string-map char-downcase s start end)))
  882. (define (string-downcase! s . maybe-start+end)
  883. (let-string-start+end (start end) string-downcase! s maybe-start+end
  884. (%string-map! char-downcase s start end)))
  885. (define (%string-titlecase! s start end)
  886. (let lp ((i start))
  887. (cond ((string-index s char-cased? i end) =>
  888. (lambda (i)
  889. (string-set! s i (char-titlecase (string-ref s i)))
  890. (let ((i1 (+ i 1)))
  891. (cond ((string-skip s char-cased? i1 end) =>
  892. (lambda (j)
  893. (string-downcase! s i1 j)
  894. (lp (+ j 1))))
  895. (else (string-downcase! s i1 end)))))))))
  896. (define (string-titlecase! s . maybe-start+end)
  897. (let-string-start+end (start end) string-titlecase! s maybe-start+end
  898. (%string-titlecase! s start end)))
  899. (define (string-titlecase s . maybe-start+end)
  900. (let-string-start+end (start end) string-titlecase! s maybe-start+end
  901. (let ((ans (substring s start end)))
  902. (%string-titlecase! ans 0 (- end start))
  903. ans)))
  904. ;;; Cutting & pasting strings
  905. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  906. ;;; string-take string nchars
  907. ;;; string-drop string nchars
  908. ;;;
  909. ;;; string-take-right string nchars
  910. ;;; string-drop-right string nchars
  911. ;;;
  912. ;;; string-pad string k [char start end]
  913. ;;; string-pad-right string k [char start end]
  914. ;;;
  915. ;;; string-trim string [char/char-set/pred start end]
  916. ;;; string-trim-right string [char/char-set/pred start end]
  917. ;;; string-trim-both string [char/char-set/pred start end]
  918. ;;;
  919. ;;; These trimmers invert the char-set meaning from MIT Scheme -- you
  920. ;;; say what you want to trim.
  921. (define (string-take s n)
  922. (check-arg string? s 'string-take)
  923. (check-arg (lambda (val) (and (integer? n) (exact? n)
  924. (<= 0 n (string-length s))))
  925. n 'string-take)
  926. (%substring/shared s 0 n))
  927. (define (string-take-right s n)
  928. (check-arg string? s 'string-take-right)
  929. (let ((len (string-length s)))
  930. (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
  931. n 'string-take-right)
  932. (%substring/shared s (- len n) len)))
  933. (define (string-drop s n)
  934. (check-arg string? s 'string-drop)
  935. (let ((len (string-length s)))
  936. (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
  937. n 'string-drop)
  938. (%substring/shared s n len)))
  939. (define (string-drop-right s n)
  940. (check-arg string? s 'string-drop-right)
  941. (let ((len (string-length s)))
  942. (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
  943. n 'string-drop-right)
  944. (%substring/shared s 0 (- len n))))
  945. (define (string-trim s . criterion+start+end)
  946. (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
  947. (let-string-start+end (start end) string-trim s rest
  948. (cond ((string-skip s criterion start end) =>
  949. (lambda (i) (%substring/shared s i end)))
  950. (else "")))))
  951. (define (string-trim-right s . criterion+start+end)
  952. (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
  953. (let-string-start+end (start end) string-trim-right s rest
  954. (cond ((string-skip-right s criterion start end) =>
  955. (lambda (i) (%substring/shared s start (+ 1 i))))
  956. (else "")))))
  957. (define (string-trim-both s . criterion+start+end)
  958. (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
  959. (let-string-start+end (start end) string-trim-both s rest
  960. (cond ((string-skip s criterion start end) =>
  961. (lambda (i)
  962. (%substring/shared s i (+ 1 (string-skip-right s criterion i end)))))
  963. (else "")))))
  964. (define (string-pad-right s n . char+start+end)
  965. (let-optionals* char+start+end ((char #\space (char? char)) rest)
  966. (let-string-start+end (start end) string-pad-right s rest
  967. (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n)))
  968. n 'string-pad-right)
  969. (let ((len (- end start)))
  970. (if (<= n len)
  971. (%substring/shared s start (+ start n))
  972. (let ((ans (make-string n char)))
  973. (%string-copy! ans 0 s start end)
  974. ans))))))
  975. (define (string-pad s n . char+start+end)
  976. (let-optionals* char+start+end ((char #\space (char? char)) rest)
  977. (let-string-start+end (start end) string-pad s rest
  978. (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n)))
  979. n 'string-pad)
  980. (let ((len (- end start)))
  981. (if (<= n len)
  982. (%substring/shared s (- end n) end)
  983. (let ((ans (make-string n char)))
  984. (%string-copy! ans (- n len) s start end)
  985. ans))))))
  986. ;;; Filtering strings
  987. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  988. ;;; string-delete char/char-set/pred string [start end]
  989. ;;; string-filter char/char-set/pred string [start end]
  990. ;;;
  991. ;;; If the criterion is a char or char-set, we scan the string twice with
  992. ;;; string-fold -- once to determine the length of the result string,
  993. ;;; and once to do the filtered copy.
  994. ;;; If the criterion is a predicate, we don't do this double-scan strategy,
  995. ;;; because the predicate might have side-effects or be very expensive to
  996. ;;; compute. So we preallocate a temp buffer pessimistically, and only do
  997. ;;; one scan over S. This is likely to be faster and more space-efficient
  998. ;;; than consing a list.
  999. (define (string-delete criterion s . maybe-start+end)
  1000. (let-string-start+end (start end) string-delete s maybe-start+end
  1001. (if (procedure? criterion)
  1002. (let* ((slen (- end start))
  1003. (temp (make-string slen))
  1004. (ans-len (string-fold (lambda (c i)
  1005. (if (criterion c) i
  1006. (begin (string-set! temp i c)
  1007. (+ i 1))))
  1008. 0 s start end)))
  1009. (if (= ans-len slen) temp (substring temp 0 ans-len)))
  1010. (let* ((cset (cond ((char-set? criterion) criterion)
  1011. ((char? criterion) (char-set criterion))
  1012. (else
  1013. (assertion-violation 'string-delete "string-delete criterion not predicate, char or char-set" criterion))))
  1014. (len (string-fold (lambda (c i) (if (char-set-contains? cset c)
  1015. i
  1016. (+ i 1)))
  1017. 0 s start end))
  1018. (ans (make-string len)))
  1019. (string-fold (lambda (c i) (if (char-set-contains? cset c)
  1020. i
  1021. (begin (string-set! ans i c)
  1022. (+ i 1))))
  1023. 0 s start end)
  1024. ans))))
  1025. (define (string-filter criterion s . maybe-start+end)
  1026. (let-string-start+end (start end) string-filter s maybe-start+end
  1027. (if (procedure? criterion)
  1028. (let* ((slen (- end start))
  1029. (temp (make-string slen))
  1030. (ans-len (string-fold (lambda (c i)
  1031. (if (criterion c)
  1032. (begin (string-set! temp i c)
  1033. (+ i 1))
  1034. i))
  1035. 0 s start end)))
  1036. (if (= ans-len slen) temp (substring temp 0 ans-len)))
  1037. (let* ((cset (cond ((char-set? criterion) criterion)
  1038. ((char? criterion) (char-set criterion))
  1039. (else (assertion-violation 'string-filter "string-filter criterion not predicate, char or char-set" criterion))))
  1040. (len (string-fold (lambda (c i) (if (char-set-contains? cset c)
  1041. (+ i 1)
  1042. i))
  1043. 0 s start end))
  1044. (ans (make-string len)))
  1045. (string-fold (lambda (c i) (if (char-set-contains? cset c)
  1046. (begin (string-set! ans i c)
  1047. (+ i 1))
  1048. i))
  1049. 0 s start end)
  1050. ans))))
  1051. ;;; String search
  1052. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1053. ;;; string-index string char/char-set/pred [start end]
  1054. ;;; string-index-right string char/char-set/pred [start end]
  1055. ;;; string-skip string char/char-set/pred [start end]
  1056. ;;; string-skip-right string char/char-set/pred [start end]
  1057. ;;; string-count string char/char-set/pred [start end]
  1058. ;;; There's a lot of replicated code here for efficiency.
  1059. ;;; For example, the char/char-set/pred discrimination has
  1060. ;;; been lifted above the inner loop of each proc.
  1061. (define (string-index str criterion . maybe-start+end)
  1062. (let-string-start+end (start end) string-index str maybe-start+end
  1063. (cond ((char? criterion)
  1064. (let lp ((i start))
  1065. (and (< i end)
  1066. (if (char=? criterion (string-ref str i)) i
  1067. (lp (+ i 1))))))
  1068. ((char-set? criterion)
  1069. (let lp ((i start))
  1070. (and (< i end)
  1071. (if (char-set-contains? criterion (string-ref str i)) i
  1072. (lp (+ i 1))))))
  1073. ((procedure? criterion)
  1074. (let lp ((i start))
  1075. (and (< i end)
  1076. (if (criterion (string-ref str i)) i
  1077. (lp (+ i 1))))))
  1078. (else (assertion-violation 'string-index
  1079. "Second param is neither char-set, char, or predicate procedure."
  1080. criterion)))))
  1081. (define (string-index-right str criterion . maybe-start+end)
  1082. (let-string-start+end (start end) string-index-right str maybe-start+end
  1083. (cond ((char? criterion)
  1084. (let lp ((i (- end 1)))
  1085. (and (>= i start)
  1086. (if (char=? criterion (string-ref str i)) i
  1087. (lp (- i 1))))))
  1088. ((char-set? criterion)
  1089. (let lp ((i (- end 1)))
  1090. (and (>= i start)
  1091. (if (char-set-contains? criterion (string-ref str i)) i
  1092. (lp (- i 1))))))
  1093. ((procedure? criterion)
  1094. (let lp ((i (- end 1)))
  1095. (and (>= i start)
  1096. (if (criterion (string-ref str i)) i
  1097. (lp (- i 1))))))
  1098. (else (assertion-violation
  1099. 'string-index-right
  1100. "Second param is neither char-set, char, or predicate procedure."
  1101. criterion)))))
  1102. (define (string-skip str criterion . maybe-start+end)
  1103. (let-string-start+end (start end) string-skip str maybe-start+end
  1104. (cond ((char? criterion)
  1105. (let lp ((i start))
  1106. (and (< i end)
  1107. (if (char=? criterion (string-ref str i))
  1108. (lp (+ i 1))
  1109. i))))
  1110. ((char-set? criterion)
  1111. (let lp ((i start))
  1112. (and (< i end)
  1113. (if (char-set-contains? criterion (string-ref str i))
  1114. (lp (+ i 1))
  1115. i))))
  1116. ((procedure? criterion)
  1117. (let lp ((i start))
  1118. (and (< i end)
  1119. (if (criterion (string-ref str i)) (lp (+ i 1))
  1120. i))))
  1121. (else (assertion-violation
  1122. 'string-skip
  1123. "Second param is neither char-set, char, or predicate procedure."
  1124. criterion)))))
  1125. (define (string-skip-right str criterion . maybe-start+end)
  1126. (let-string-start+end (start end) string-skip-right str maybe-start+end
  1127. (cond ((char? criterion)
  1128. (let lp ((i (- end 1)))
  1129. (and (>= i start)
  1130. (if (char=? criterion (string-ref str i))
  1131. (lp (- i 1))
  1132. i))))
  1133. ((char-set? criterion)
  1134. (let lp ((i (- end 1)))
  1135. (and (>= i start)
  1136. (if (char-set-contains? criterion (string-ref str i))
  1137. (lp (- i 1))
  1138. i))))
  1139. ((procedure? criterion)
  1140. (let lp ((i (- end 1)))
  1141. (and (>= i start)
  1142. (if (criterion (string-ref str i)) (lp (- i 1))
  1143. i))))
  1144. (else (assertion-violation 'string-skip-right
  1145. "CRITERION param is neither char-set or char."
  1146. criterion)))))
  1147. (define (string-count s criterion . maybe-start+end)
  1148. (let-string-start+end (start end) string-count s maybe-start+end
  1149. (cond ((char? criterion)
  1150. (do ((i start (+ i 1))
  1151. (count 0 (if (char=? criterion (string-ref s i))
  1152. (+ count 1)
  1153. count)))
  1154. ((>= i end) count)))
  1155. ((char-set? criterion)
  1156. (do ((i start (+ i 1))
  1157. (count 0 (if (char-set-contains? criterion (string-ref s i))
  1158. (+ count 1)
  1159. count)))
  1160. ((>= i end) count)))
  1161. ((procedure? criterion)
  1162. (do ((i start (+ i 1))
  1163. (count 0 (if (criterion (string-ref s i)) (+ count 1) count)))
  1164. ((>= i end) count)))
  1165. (else (assertion-violation
  1166. 'string-count
  1167. "CRITERION param is neither char-set or char."
  1168. criterion)))))
  1169. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1170. ;;; string-fill! string char [start end]
  1171. ;;;
  1172. ;;; string-copy! to tstart from [fstart fend]
  1173. ;;; Guaranteed to work, even if s1 eq s2.
  1174. (define (string-fill! s char . maybe-start+end)
  1175. (check-arg char? char 'string-fill!)
  1176. (let-string-start+end (start end) string-fill! s maybe-start+end
  1177. (do ((i (- end 1) (- i 1)))
  1178. ((< i start))
  1179. (string-set! s i char))))
  1180. (define (string-copy! to tstart from . maybe-fstart+fend)
  1181. (let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend
  1182. (check-arg integer? tstart 'string-copy!)
  1183. (check-substring-spec 'string-copy! to tstart (+ tstart (- fend fstart)))
  1184. (%string-copy! to tstart from fstart fend)))
  1185. ;;; Library-internal routine
  1186. (define (%string-copy! to tstart from fstart fend)
  1187. (if (> fstart tstart)
  1188. (do ((i fstart (+ i 1))
  1189. (j tstart (+ j 1)))
  1190. ((>= i fend))
  1191. (string-set! to j (string-ref from i)))
  1192. (do ((i (- fend 1) (- i 1))
  1193. (j (+ -1 tstart (- fend fstart)) (- j 1)))
  1194. ((< i fstart))
  1195. (string-set! to j (string-ref from i)))))
  1196. ;;; Returns starting-position in STRING or #f if not true.
  1197. ;;; This implementation is slow & simple. It is useful as a "spec" or for
  1198. ;;; comparison testing with fancier implementations.
  1199. ;;; See below for fast KMP version.
  1200. ;(define (string-contains string substring . maybe-starts+ends)
  1201. ; (let-string-start+end2 (start1 end1 start2 end2)
  1202. ; string-contains string substring maybe-starts+ends
  1203. ; (let* ((len (- end2 start2))
  1204. ; (i-bound (- end1 len)))
  1205. ; (let lp ((i start1))
  1206. ; (and (< i i-bound)
  1207. ; (if (string= string substring i (+ i len) start2 end2)
  1208. ; i
  1209. ; (lp (+ i 1))))))))
  1210. ;;; Searching for an occurrence of a substring
  1211. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1212. (define (string-contains text pattern . maybe-starts+ends)
  1213. (let-string-start+end2 (t-start t-end p-start p-end)
  1214. string-contains text pattern maybe-starts+ends
  1215. (%kmp-search pattern text char=? p-start p-end t-start t-end)))
  1216. (define (string-contains-ci text pattern . maybe-starts+ends)
  1217. (let-string-start+end2 (t-start t-end p-start p-end)
  1218. string-contains-ci text pattern maybe-starts+ends
  1219. (%kmp-search pattern text char-ci=? p-start p-end t-start t-end)))
  1220. ;;; Knuth-Morris-Pratt string searching
  1221. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1222. ;;; See
  1223. ;;; "Fast pattern matching in strings"
  1224. ;;; SIAM J. Computing 6(2):323-350 1977
  1225. ;;; D. E. Knuth, J. H. Morris and V. R. Pratt
  1226. ;;; also described in
  1227. ;;; "Pattern matching in strings"
  1228. ;;; Alfred V. Aho
  1229. ;;; Formal Language Theory - Perspectives and Open Problems
  1230. ;;; Ronald V. Brook (editor)
  1231. ;;; This algorithm is O(m + n) where m and n are the
  1232. ;;; lengths of the pattern and string respectively
  1233. ;;; KMP search source[start,end) for PATTERN. Return starting index of
  1234. ;;; leftmost match or #f.
  1235. (define (%kmp-search pattern text c= p-start p-end t-start t-end)
  1236. (let ((plen (- p-end p-start))
  1237. (rv (make-kmp-restart-vector pattern c= p-start p-end)))
  1238. ;; The search loop. TJ & PJ are redundant state.
  1239. (let lp ((ti t-start) (pi 0)
  1240. (tj (- t-end t-start)) ; (- tlen ti) -- how many chars left.
  1241. (pj plen)) ; (- plen pi) -- how many chars left.
  1242. (if (= pi plen)
  1243. (- ti plen) ; Win.
  1244. (and (<= pj tj) ; Lose.
  1245. (if (c= (string-ref text ti) ; Search.
  1246. (string-ref pattern (+ p-start pi)))
  1247. (lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance.
  1248. (let ((pi (vector-ref rv pi))) ; Retreat.
  1249. (if (= pi -1)
  1250. (lp (+ ti 1) 0 (- tj 1) plen) ; Punt.
  1251. (lp ti pi tj (- plen pi))))))))))
  1252. ;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector
  1253. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1254. ;;; Compute the KMP restart vector RV for string PATTERN. If
  1255. ;;; we have matched chars 0..i-1 of PATTERN against a search string S, and
  1256. ;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to
  1257. ;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to
  1258. ;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k].
  1259. ;;;
  1260. ;;; In other words, if you have matched the first i chars of PATTERN, but
  1261. ;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest
  1262. ;;; prefix of PATTERN is that you have matched.
  1263. ;;;
  1264. ;;; - C= (default CHAR=?) is used to compare characters for equality.
  1265. ;;; Pass in CHAR-CI=? for case-folded string search.
  1266. ;;;
  1267. ;;; - START & END restrict the pattern to the indicated substring; the
  1268. ;;; returned vector will be of length END - START. The numbers stored
  1269. ;;; in the vector will be values in the range [0,END-START) -- that is,
  1270. ;;; they are valid indices into the restart vector; you have to add START
  1271. ;;; to them to use them as indices into PATTERN.
  1272. ;;;
  1273. ;;; I've split this out as a separate function in case other constant-string
  1274. ;;; searchers might want to use it.
  1275. ;;;
  1276. ;;; E.g.:
  1277. ;;; a b d a b x
  1278. ;;; #(-1 0 0 -1 1 2)
  1279. (define (make-kmp-restart-vector pattern . maybe-c=+start+end)
  1280. (let-optionals* maybe-c=+start+end
  1281. ((c= char=? (procedure? c=))
  1282. ((start end) (lambda (args)
  1283. (string-parse-start+end 'make-kmp-restart-vector
  1284. pattern args))))
  1285. (let* ((rvlen (- end start))
  1286. (rv (make-vector rvlen -1)))
  1287. (if (> rvlen 0)
  1288. (let ((rvlen-1 (- rvlen 1))
  1289. (c0 (string-ref pattern start)))
  1290. ;; Here's the main loop. We have set rv[0] ... rv[i].
  1291. ;; K = I + START -- it is the corresponding index into PATTERN.
  1292. (let lp1 ((i 0) (j -1) (k start))
  1293. (if (< i rvlen-1)
  1294. ;; lp2 invariant:
  1295. ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1]
  1296. ;; or j = -1.
  1297. (let lp2 ((j j))
  1298. (cond ((= j -1)
  1299. (let ((i1 (+ 1 i)))
  1300. (if (not (c= (string-ref pattern (+ k 1)) c0))
  1301. (vector-set! rv i1 0))
  1302. (lp1 i1 0 (+ k 1))))
  1303. ;; pat[(k-j) .. k] matches pat[start..start+j].
  1304. ((c= (string-ref pattern k) (string-ref pattern (+ j start)))
  1305. (let* ((i1 (+ 1 i))
  1306. (j1 (+ 1 j)))
  1307. (vector-set! rv i1 j1)
  1308. (lp1 i1 j1 (+ k 1))))
  1309. (else (lp2 (vector-ref rv j)))))))))
  1310. rv)))
  1311. ;;; We've matched I chars from PAT. C is the next char from the search string.
  1312. ;;; Return the new I after handling C.
  1313. ;;;
  1314. ;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START
  1315. ;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched
  1316. ;;; are
  1317. ;;; PAT[PAT-START .. PAT-START + I].
  1318. ;;;
  1319. ;;; It's *not* an oversight that there is no friendly error checking or
  1320. ;;; defaulting of arguments. This is a low-level, inner-loop procedure
  1321. ;;; that we want integrated/inlined into the point of call.
  1322. (define (kmp-step pat rv c i c= p-start)
  1323. (let lp ((i i))
  1324. (if (c= c (string-ref pat (+ i p-start))) ; Match =>
  1325. (+ i 1) ; Done.
  1326. (let ((i (vector-ref rv i))) ; Back up in PAT.
  1327. (if (= i -1) 0 ; Can't back up further.
  1328. (lp i)))))) ; Keep trying for match.
  1329. ;;; Zip through S[start,end), looking for a match of PAT. Assume we've
  1330. ;;; already matched the first I chars of PAT when we commence at S[start].
  1331. ;;; - <0: If we find a match *ending* at index J, return -J.
  1332. ;;; - >=0: If we get to the end of the S[start,end) span without finding
  1333. ;;; a complete match, return the number of chars from PAT we'd matched
  1334. ;;; when we ran off the end.
  1335. ;;;
  1336. ;;; This is useful for searching *across* buffers -- that is, when your
  1337. ;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop
  1338. ;;; for speed.
  1339. (define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end)
  1340. (check-arg vector? rv 'string-kmp-partial-search)
  1341. (let-optionals* c=+p-start+s-start+s-end
  1342. ((c= char=? (procedure? c=))
  1343. (p-start 0 (and (integer? p-start) (exact? p-start) (<= 0 p-start)))
  1344. ((s-start s-end) (lambda (args)
  1345. (string-parse-start+end 'string-kmp-partial-search
  1346. s args))))
  1347. (let ((patlen (vector-length rv)))
  1348. (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i) (< i patlen)))
  1349. i 'string-kmp-partial-search)
  1350. ;; Enough prelude. Here's the actual code.
  1351. (let lp ((si s-start) ; An index into S.
  1352. (vi i)) ; An index into RV.
  1353. (cond ((= vi patlen) (- si)) ; Win.
  1354. ((= si s-end) vi) ; Ran off the end.
  1355. (else ; Match s[si] & loop.
  1356. (let ((c (string-ref s si)))
  1357. (lp (+ si 1)
  1358. (let lp2 ((vi vi)) ; This is just KMP-STEP.
  1359. (if (c= c (string-ref pat (+ vi p-start)))
  1360. (+ vi 1)
  1361. (let ((vi (vector-ref rv vi)))
  1362. (if (= vi -1) 0
  1363. (lp2 vi)))))))))))))
  1364. ;;; Misc
  1365. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1366. ;;; (string-null? s)
  1367. ;;; (string-reverse s [start end])
  1368. ;;; (string-reverse! s [start end])
  1369. ;;; (reverse-list->string clist)
  1370. ;;; (string->list s [start end])
  1371. (define (string-null? s) (zero? (string-length s)))
  1372. (define (string-reverse s . maybe-start+end)
  1373. (let-string-start+end (start end) string-reverse s maybe-start+end
  1374. (let* ((len (- end start))
  1375. (ans (make-string len)))
  1376. (do ((i start (+ i 1))
  1377. (j (- len 1) (- j 1)))
  1378. ((< j 0))
  1379. (string-set! ans j (string-ref s i)))
  1380. ans)))
  1381. (define (string-reverse! s . maybe-start+end)
  1382. (let-string-start+end (start end) string-reverse! s maybe-start+end
  1383. (do ((i (- end 1) (- i 1))
  1384. (j start (+ j 1)))
  1385. ((<= i j))
  1386. (let ((ci (string-ref s i)))
  1387. (string-set! s i (string-ref s j))
  1388. (string-set! s j ci)))))
  1389. (define (reverse-list->string clist)
  1390. (let* ((len (length clist))
  1391. (s (make-string len)))
  1392. (do ((i (- len 1) (- i 1)) (clist clist (cdr clist)))
  1393. ((not (pair? clist)))
  1394. (string-set! s i (car clist)))
  1395. s))
  1396. ;(define (string->list s . maybe-start+end)
  1397. ; (apply string-fold-right cons '() s maybe-start+end))
  1398. (define (string->list s . maybe-start+end)
  1399. (let-string-start+end (start end) string->list s maybe-start+end
  1400. (do ((i (- end 1) (- i 1))
  1401. (ans '() (cons (string-ref s i) ans)))
  1402. ((< i start) ans))))
  1403. ;;; Defined by R5RS, so commented out here.
  1404. ;(define (list->string lis) (string-unfold null? car cdr lis))
  1405. ;;; string-concatenate string-list -> string
  1406. ;;; string-concatenate/shared string-list -> string
  1407. ;;; string-append/shared s ... -> string
  1408. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1409. ;;; STRING-APPEND/SHARED has license to return a string that shares storage
  1410. ;;; with any of its arguments. In particular, if there is only one non-empty
  1411. ;;; string amongst its parameters, it is permitted to return that string as
  1412. ;;; its result. STRING-APPEND, by contrast, always allocates new storage.
  1413. ;;;
  1414. ;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of
  1415. ;;; strings, which they concatenate into a result string. STRING-CONCATENATE
  1416. ;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may
  1417. ;;; not) return a result that shares storage with any of its arguments. In
  1418. ;;; particular, if it is applied to a singleton list, it is permitted to
  1419. ;;; return the car of that list as its value.
  1420. (define (string-append/shared . strings) (string-concatenate/shared strings))
  1421. (define (string-concatenate/shared strings)
  1422. (let lp ((strings strings) (nchars 0) (first #f))
  1423. (cond ((pair? strings) ; Scan the args, add up total
  1424. (let* ((string (car strings)) ; length, remember 1st
  1425. (tail (cdr strings)) ; non-empty string.
  1426. (slen (string-length string)))
  1427. (if (zero? slen)
  1428. (lp tail nchars first)
  1429. (lp tail (+ nchars slen) (or first strings)))))
  1430. ((zero? nchars) "")
  1431. ;; Just one non-empty string! Return it.
  1432. ((= nchars (string-length (car first))) (car first))
  1433. (else (let ((ans (make-string nchars)))
  1434. (let lp ((strings first) (i 0))
  1435. (if (pair? strings)
  1436. (let* ((s (car strings))
  1437. (slen (string-length s)))
  1438. (%string-copy! ans i s 0 slen)
  1439. (lp (cdr strings) (+ i slen)))))
  1440. ans)))))
  1441. ; Alas, Scheme 48's APPLY blows up if you have many, many arguments.
  1442. ;(define (string-concatenate strings) (apply string-append strings))
  1443. ;;; Here it is written out. I avoid using REDUCE to add up string lengths
  1444. ;;; to avoid non-R5RS dependencies.
  1445. (define (string-concatenate strings)
  1446. (let* ((total (do ((strings strings (cdr strings))
  1447. (i 0 (+ i (string-length (car strings)))))
  1448. ((not (pair? strings)) i)))
  1449. (ans (make-string total)))
  1450. (let lp ((i 0) (strings strings))
  1451. (if (pair? strings)
  1452. (let* ((s (car strings))
  1453. (slen (string-length s)))
  1454. (%string-copy! ans i s 0 slen)
  1455. (lp (+ i slen) (cdr strings)))))
  1456. ans))
  1457. ;;; Defined by R5RS, so commented out here.
  1458. ;(define (string-append . strings) (string-concatenate strings))
  1459. ;;; string-concatenate-reverse string-list [final-string end] -> string
  1460. ;;; string-concatenate-reverse/shared string-list [final-string end] -> string
  1461. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1462. ;;; Return
  1463. ;;; (string-concatenate
  1464. ;;; (reverse
  1465. ;;; (cons (substring final-string 0 end) string-list)))
  1466. (define (string-concatenate-reverse string-list . maybe-final+end)
  1467. (let-optionals* maybe-final+end ((final "" (string? final))
  1468. (end (string-length final)
  1469. (and (integer? end)
  1470. (exact? end)
  1471. (<= 0 end (string-length final)))))
  1472. (let ((len (let lp ((sum 0) (lis string-list))
  1473. (if (pair? lis)
  1474. (lp (+ sum (string-length (car lis))) (cdr lis))
  1475. sum))))
  1476. (%finish-string-concatenate-reverse len string-list final end))))
  1477. (define (string-concatenate-reverse/shared string-list . maybe-final+end)
  1478. (let-optionals* maybe-final+end ((final "" (string? final))
  1479. (end (string-length final)
  1480. (and (integer? end)
  1481. (exact? end)
  1482. (<= 0 end (string-length final)))))
  1483. ;; Add up the lengths of all the strings in STRING-LIST; also get a
  1484. ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length
  1485. ;; string starts.
  1486. (let lp ((len 0) (nzlist #f) (lis string-list))
  1487. (if (pair? lis)
  1488. (let ((slen (string-length (car lis))))
  1489. (lp (+ len slen)
  1490. (if (or nzlist (zero? slen)) nzlist lis)
  1491. (cdr lis)))
  1492. (cond ((zero? len) (substring/shared final 0 end))
  1493. ;; LEN > 0, so NZLIST is non-empty.
  1494. ((and (zero? end) (= len (string-length (car nzlist))))
  1495. (car nzlist))
  1496. (else (%finish-string-concatenate-reverse len nzlist final end)))))))
  1497. (define (%finish-string-concatenate-reverse len string-list final end)
  1498. (let ((ans (make-string (+ end len))))
  1499. (%string-copy! ans len final 0 end)
  1500. (let lp ((i len) (lis string-list))
  1501. (if (pair? lis)
  1502. (let* ((s (car lis))
  1503. (lis (cdr lis))
  1504. (slen (string-length s))
  1505. (i (- i slen)))
  1506. (%string-copy! ans i s 0 slen)
  1507. (lp i lis))))
  1508. ans))
  1509. ;;; string-replace s1 s2 start1 end1 [start2 end2] -> string
  1510. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1511. ;;; Replace S1[START1,END1) with S2[START2,END2).
  1512. (define (string-replace s1 s2 start1 end1 . maybe-start+end)
  1513. (check-substring-spec 'string-replace s1 start1 end1)
  1514. (let-string-start+end (start2 end2) string-replace s2 maybe-start+end
  1515. (let* ((slen1 (string-length s1))
  1516. (sublen2 (- end2 start2))
  1517. (alen (+ (- slen1 (- end1 start1)) sublen2))
  1518. (ans (make-string alen)))
  1519. (%string-copy! ans 0 s1 0 start1)
  1520. (%string-copy! ans start1 s2 start2 end2)
  1521. (%string-copy! ans (+ start1 sublen2) s1 end1 slen1)
  1522. ans)))
  1523. ;;; string-tokenize s [token-set start end] -> list
  1524. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1525. ;;; Break S up into a list of token strings, where a token is a maximal
  1526. ;;; non-empty contiguous sequence of chars belonging to TOKEN-SET.
  1527. ;;; (string-tokenize "hello, world") => ("hello," "world")
  1528. (define (string-tokenize s . token-chars+start+end)
  1529. (let-optionals* token-chars+start+end
  1530. ((token-chars char-set:graphic (char-set? token-chars)) rest)
  1531. (let-string-start+end (start end) string-tokenize s rest
  1532. (let lp ((i end) (ans '()))
  1533. (cond ((and (< start i) (string-index-right s token-chars start i)) =>
  1534. (lambda (tend-1)
  1535. (let ((tend (+ 1 tend-1)))
  1536. (cond ((string-skip-right s token-chars start tend-1) =>
  1537. (lambda (tstart-1)
  1538. (lp tstart-1
  1539. (cons (substring s (+ 1 tstart-1) tend)
  1540. ans))))
  1541. (else (cons (substring s start tend) ans))))))
  1542. (else ans))))))
  1543. ;;; xsubstring s from [to start end] -> string
  1544. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1545. ;;; S is a string; START and END are optional arguments that demarcate
  1546. ;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole
  1547. ;;; string). Replicate this substring up and down index space, in both the
  1548. ;; positive and negative directions. For example, if S = "abcdefg", START=3,
  1549. ;;; and END=6, then we have the conceptual bidirectionally-infinite string
  1550. ;;; ... d e f d e f d e f d e f d e f d e f d e f ...
  1551. ;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ...
  1552. ;;; XSUBSTRING returns the substring of this string beginning at index FROM,
  1553. ;;; and ending at TO (which defaults to FROM+(END-START)).
  1554. ;;;
  1555. ;;; You can use XSUBSTRING in many ways:
  1556. ;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab"
  1557. ;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd"
  1558. ;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca"
  1559. ;;;
  1560. ;;; Note that
  1561. ;;; - The FROM/TO indices give a half-open range -- the characters from
  1562. ;;; index FROM up to, but not including index TO.
  1563. ;;; - The FROM/TO indices are not in terms of the index space for string S.
  1564. ;;; They are in terms of the replicated index space of the substring
  1565. ;;; defined by S, START, and END.
  1566. ;;;
  1567. ;;; It is an error if START=END -- although this is allowed by special
  1568. ;;; dispensation when FROM=TO.
  1569. (define (xsubstring s from . maybe-to+start+end)
  1570. (check-arg (lambda (val) (and (integer? val) (exact? val)))
  1571. from 'xsubstring)
  1572. (receive (to start end)
  1573. (if (pair? maybe-to+start+end)
  1574. (let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end)
  1575. (let ((to (car maybe-to+start+end)))
  1576. (check-arg (lambda (val) (and (integer? val)
  1577. (exact? val)
  1578. (<= from val)))
  1579. to 'xsubstring)
  1580. (values to start end)))
  1581. (let ((slen (string-length (check-arg string? s 'xsubstring))))
  1582. (values (+ from slen) 0 slen)))
  1583. (let ((slen (- end start))
  1584. (anslen (- to from)))
  1585. (cond ((zero? anslen) "")
  1586. ((zero? slen) (assertion-violation 'xsubstring
  1587. "Cannot replicate empty (sub)string"
  1588. s from to start end))
  1589. ((= 1 slen) ; Fast path for 1-char replication.
  1590. (make-string anslen (string-ref s start)))
  1591. ;; Selected text falls entirely within one span.
  1592. ((= (floor (/ from slen)) (floor (/ to slen)))
  1593. (substring s (+ start (modulo from slen))
  1594. (+ start (modulo to slen))))
  1595. ;; Selected text requires multiple spans.
  1596. (else (let ((ans (make-string anslen)))
  1597. (%multispan-repcopy! ans 0 s from to start end)
  1598. ans))))))
  1599. ;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific
  1600. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1601. ;;; Exactly the same as xsubstring, but the extracted text is written
  1602. ;;; into the string TARGET starting at index TSTART.
  1603. ;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy
  1604. ;;; a string on top of itself.
  1605. (define (string-xcopy! target tstart s sfrom . maybe-sto+start+end)
  1606. (check-arg (lambda (val) (and (integer? val) (exact? val)))
  1607. sfrom 'string-xcopy!)
  1608. (receive (sto start end)
  1609. (if (pair? maybe-sto+start+end)
  1610. (let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end)
  1611. (let ((sto (car maybe-sto+start+end)))
  1612. (check-arg (lambda (val) (and (integer? val) (exact? val)))
  1613. sto 'string-xcopy!)
  1614. (values sto start end)))
  1615. (let ((slen (string-length s)))
  1616. (values (+ sfrom slen) 0 slen)))
  1617. (let* ((tocopy (- sto sfrom))
  1618. (tend (+ tstart tocopy))
  1619. (slen (- end start)))
  1620. (check-substring-spec 'string-xcopy! target tstart tend)
  1621. (cond ((zero? tocopy))
  1622. ((zero? slen)
  1623. (assertion-violation 'string-xcopy!
  1624. "Cannot replicate empty (sub)string"
  1625. target tstart s sfrom sto start end))
  1626. ((= 1 slen) ; Fast path for 1-char replication.
  1627. (string-fill! target (string-ref s start) tstart tend))
  1628. ;; Selected text falls entirely within one span.
  1629. ((= (floor (/ sfrom slen)) (floor (/ sto slen)))
  1630. (%string-copy! target tstart s
  1631. (+ start (modulo sfrom slen))
  1632. (+ start (modulo sto slen))))
  1633. ;; Multi-span copy.
  1634. (else (%multispan-repcopy! target tstart s sfrom sto start end))))))
  1635. ;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY!
  1636. ;;; Internal -- not exported, no careful arg checking.
  1637. (define (%multispan-repcopy! target tstart s sfrom sto start end)
  1638. (let* ((slen (- end start))
  1639. (i0 (+ start (modulo sfrom slen)))
  1640. (total-chars (- sto sfrom)))
  1641. ;; Copy the partial span @ the beginning
  1642. (%string-copy! target tstart s i0 end)
  1643. (let* ((ncopied (- end i0)) ; We've copied this many.
  1644. (nleft (- total-chars ncopied)) ; # chars left to copy.
  1645. (nspans (quotient nleft slen))) ; # whole spans to copy
  1646. ;; Copy the whole spans in the middle.
  1647. (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index.
  1648. (nspans nspans (- nspans 1))) ; # spans to copy
  1649. ((zero? nspans)
  1650. ;; Copy the partial-span @ the end & we're done.
  1651. (%string-copy! target i s start (+ start (- total-chars (- i tstart)))))
  1652. (%string-copy! target i s start end))))); Copy a whole span.
  1653. ;;; (string-join string-list [delimiter grammar]) => string
  1654. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1655. ;;; Paste strings together using the delimiter string.
  1656. ;;;
  1657. ;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz"
  1658. ;;;
  1659. ;;; DELIMITER defaults to a single space " "
  1660. ;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix}
  1661. ;;; and defaults to 'infix.
  1662. ;;;
  1663. ;;; I could rewrite this more efficiently -- precompute the length of the
  1664. ;;; answer string, then allocate & fill it in iteratively. Using
  1665. ;;; STRING-CONCATENATE is less efficient.
  1666. (define (string-join strings . delim+grammar)
  1667. (let-optionals* delim+grammar ((delim " " (string? delim))
  1668. (grammar 'infix))
  1669. (let ((buildit (lambda (lis final)
  1670. (let recur ((lis lis))
  1671. (if (pair? lis)
  1672. (cons delim (cons (car lis) (recur (cdr lis))))
  1673. final)))))
  1674. (cond ((pair? strings)
  1675. (string-concatenate
  1676. (case grammar
  1677. ((infix strict-infix)
  1678. (cons (car strings) (buildit (cdr strings) '())))
  1679. ((prefix) (buildit strings '()))
  1680. ((suffix)
  1681. (cons (car strings) (buildit (cdr strings) (list delim))))
  1682. (else (assertion-violation 'string-join
  1683. "Illegal join grammar"
  1684. grammar)))))
  1685. ((not (null? strings))
  1686. (assertion-violation 'string-join
  1687. "STRINGS parameter not list." strings))
  1688. ;; STRINGS is ()
  1689. ((eq? grammar 'strict-infix)
  1690. (assertion-violation 'string-join
  1691. "Empty list cannot be joined with STRICT-INFIX grammar."))
  1692. (else ""))))) ; Special-cased for infix grammar.
  1693. ;;; Porting & performance-tuning notes
  1694. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1695. ;;; See the section at the beginning of this file on external dependencies.
  1696. ;;;
  1697. ;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro.
  1698. ;;; There are many, many optional arguments in this library; the complexity
  1699. ;;; of parsing, defaulting & type-testing these parameters is handled with the
  1700. ;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can
  1701. ;;; rewrite the uses, port the hairy macro definition (which is implemented
  1702. ;;; using a Clinger-Rees low-level explicit-renaming macro system), or port
  1703. ;;; the simple, high-level definition, which is less efficient.
  1704. ;;;
  1705. ;;; There is a fair amount of argument checking. This is, strictly speaking,
  1706. ;;; unnecessary -- the actual body of the procedures will blow up if, say, a
  1707. ;;; START/END index is improper. However, the error message will not be as
  1708. ;;; good as if the error were caught at the "higher level." Also, a very, very
  1709. ;;; smart Scheme compiler may be able to exploit having the type checks done
  1710. ;;; early, so that the actual body of the procedures can assume proper values.
  1711. ;;; This isn't likely; this kind of compiler technology isn't common any
  1712. ;;; longer.
  1713. ;;;
  1714. ;;; The overhead of optional-argument parsing is irritating. The optional
  1715. ;;; arguments must be consed into a rest list on entry, and then parsed out.
  1716. ;;; Function call should be a matter of a few register moves and a jump; it
  1717. ;;; should not involve heap allocation! Your Scheme system may have a superior
  1718. ;;; non-R5RS optional-argument system that can eliminate this overhead. If so,
  1719. ;;; then this is a prime candidate for optimising these procedures,
  1720. ;;; *especially* the many optional START/END index parameters.
  1721. ;;;
  1722. ;;; Note that optional arguments are also a barrier to procedure integration.
  1723. ;;; If your Scheme system permits you to specify alternate entry points
  1724. ;;; for a call when the number of optional arguments is known in a manner
  1725. ;;; that enables inlining/integration, this can provide performance
  1726. ;;; improvements.
  1727. ;;;
  1728. ;;; There is enough *explicit* error checking that *all* string-index
  1729. ;;; operations should *never* produce a bounds error. Period. Feel like
  1730. ;;; living dangerously? *Big* performance win to be had by replacing
  1731. ;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops.
  1732. ;;; Similarly, fixnum-specific operators can speed up the arithmetic done on
  1733. ;;; the index values in the inner loops. The only arguments that are not
  1734. ;;; completely error checked are
  1735. ;;; - string lists (complete checking requires time proportional to the
  1736. ;;; length of the list)
  1737. ;;; - procedure arguments, such as char->char maps & predicates.
  1738. ;;; There is no way to check the range & domain of procedures in Scheme.
  1739. ;;; Procedures that take these parameters cannot fully check their
  1740. ;;; arguments. But all other types to all other procedures are fully
  1741. ;;; checked.
  1742. ;;;
  1743. ;;; This does open up the alternate possibility of simply *removing* these
  1744. ;;; checks, and letting the safe primitives raise the errors. On a dumb
  1745. ;;; Scheme system, this would provide speed (by eliminating the redundant
  1746. ;;; error checks) at the cost of error-message clarity.
  1747. ;;;
  1748. ;;; See the comments preceding the hash function code for notes on tuning
  1749. ;;; the default bound so that the code never overflows your implementation's
  1750. ;;; fixnum size into bignum calculation.
  1751. ;;;
  1752. ;;; In an interpreted Scheme, some of these procedures, or the internal
  1753. ;;; routines with % prefixes, are excellent candidates for being rewritten
  1754. ;;; in C. Consider STRING-HASH, %STRING-COMPARE, the
  1755. ;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX &
  1756. ;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED,
  1757. ;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!.
  1758. ;;;
  1759. ;;; It would also be nice to have the ability to mark some of these
  1760. ;;; routines as candidates for inlining/integration.
  1761. ;;;
  1762. ;;; All the %-prefixed routines in this source code are written
  1763. ;;; to be called internally to this library. They do *not* perform
  1764. ;;; friendly error checks on the inputs; they assume everything is
  1765. ;;; proper. They also do not take optional arguments. These two properties
  1766. ;;; save calling overhead and enable procedure integration -- but they
  1767. ;;; are not appropriate for exported routines.
  1768. ;;; Copyright details
  1769. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1770. ;;; The prefix/suffix and comparison routines in this code had (extremely
  1771. ;;; distant) origins in MIT Scheme's string lib, and was substantially
  1772. ;;; reworked by Olin Shivers (shivers@ai.mit.edu) 9/98. As such, it is
  1773. ;;; covered by MIT Scheme's open source copyright. See below for details.
  1774. ;;;
  1775. ;;; The KMP string-search code was influenced by implementations written
  1776. ;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this
  1777. ;;; version was written from scratch by myself.
  1778. ;;;
  1779. ;;; The remainder of this code was written from scratch by myself for scsh.
  1780. ;;; The scsh copyright is a BSD-style open source copyright. See below for
  1781. ;;; details.
  1782. ;;; -Olin Shivers
  1783. ;;; MIT Scheme copyright terms
  1784. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1785. ;;; This material was developed by the Scheme project at the Massachusetts
  1786. ;;; Institute of Technology, Department of Electrical Engineering and
  1787. ;;; Computer Science. Permission to copy and modify this software, to
  1788. ;;; redistribute either the original software or a modified version, and
  1789. ;;; to use this software for any purpose is granted, subject to the
  1790. ;;; following restrictions and understandings.
  1791. ;;;
  1792. ;;; 1. Any copy made of this software must include this copyright notice
  1793. ;;; in full.
  1794. ;;;
  1795. ;;; 2. Users of this software agree to make their best efforts (a) to
  1796. ;;; return to the MIT Scheme project any improvements or extensions that
  1797. ;;; they make, so that these may be included in future releases; and (b)
  1798. ;;; to inform MIT of noteworthy uses of this software.
  1799. ;;;
  1800. ;;; 3. All materials developed as a consequence of the use of this
  1801. ;;; software shall duly acknowledge such use, in accordance with the usual
  1802. ;;; standards of acknowledging credit in academic research.
  1803. ;;;
  1804. ;;; 4. MIT has made no warrantee or representation that the operation of
  1805. ;;; this software will be error-free, and MIT is under no obligation to
  1806. ;;; provide any services, by way of maintenance, update, or otherwise.
  1807. ;;;
  1808. ;;; 5. In conjunction with products arising from the use of this material,
  1809. ;;; there shall be no use of the name of the Massachusetts Institute of
  1810. ;;; Technology nor of any adaptation thereof in any advertising,
  1811. ;;; promotional, or sales literature without prior written consent from
  1812. ;;; MIT in each case.
  1813. ;;; Scsh copyright terms
  1814. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1815. ;;; All rights reserved.
  1816. ;;;
  1817. ;;; Redistribution and use in source and binary forms, with or without
  1818. ;;; modification, are permitted provided that the following conditions
  1819. ;;; are met:
  1820. ;;; 1. Redistributions of source code must retain the above copyright
  1821. ;;; notice, this list of conditions and the following disclaimer.
  1822. ;;; 2. Redistributions in binary form must reproduce the above copyright
  1823. ;;; notice, this list of conditions and the following disclaimer in the
  1824. ;;; documentation and/or other materials provided with the distribution.
  1825. ;;; 3. The name of the authors may not be used to endorse or promote products
  1826. ;;; derived from this software without specific prior written permission.
  1827. ;;;
  1828. ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  1829. ;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  1830. ;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  1831. ;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  1832. ;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  1833. ;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  1834. ;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  1835. ;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  1836. ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  1837. ;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.