123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121 |
- ;;; SRFI 13 string library reference implementation -*- Scheme -*-
- ;;; Olin Shivers 7/2000
- ;;;
- ;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology.
- ;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved.
- ;;; The details of the copyrights appear at the end of the file. Short
- ;;; summary: BSD-style open source.
- ;;; Exports:
- ;;; string-map string-map!
- ;;; string-fold string-unfold
- ;;; string-fold-right string-unfold-right
- ;;; string-tabulate string-for-each string-for-each-index
- ;;; string-every string-any
- ;;; string-hash string-hash-ci
- ;;; string-compare string-compare-ci
- ;;; string= string< string> string<= string>= string<>
- ;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
- ;;; string-downcase string-upcase string-titlecase
- ;;; string-downcase! string-upcase! string-titlecase!
- ;;; string-take string-take-right
- ;;; string-drop string-drop-right
- ;;; string-pad string-pad-right
- ;;; string-trim string-trim-right string-trim-both
- ;;; string-filter string-delete
- ;;; string-index string-index-right
- ;;; string-skip string-skip-right
- ;;; string-count
- ;;; string-prefix-length string-prefix-length-ci
- ;;; string-suffix-length string-suffix-length-ci
- ;;; string-prefix? string-prefix-ci?
- ;;; string-suffix? string-suffix-ci?
- ;;; string-contains string-contains-ci
- ;;; string-copy! substring/shared
- ;;; string-reverse string-reverse! reverse-list->string
- ;;; string-concatenate string-concatenate/shared string-concatenate-reverse
- ;;; string-append/shared
- ;;; xsubstring string-xcopy!
- ;;; string-null?
- ;;; string-join
- ;;; string-tokenize
- ;;; string-replace
- ;;;
- ;;; R5RS extended:
- ;;; string->list string-copy string-fill!
- ;;;
- ;;; R5RS re-exports:
- ;;; string? make-string string-length string-ref string-set!
- ;;;
- ;;; R5RS re-exports (also defined here but commented-out):
- ;;; string string-append list->string
- ;;;
- ;;; Low-level routines:
- ;;; make-kmp-restart-vector string-kmp-partial-search kmp-step
- ;;; string-parse-start+end
- ;;; string-parse-final-start+end
- ;;; let-string-start+end
- ;;; check-substring-spec
- ;;; substring-spec-ok?
- ;;; Imports
- ;;; This is a fairly large library. While it was written for portability, you
- ;;; must be aware of its dependencies in order to run it in a given scheme
- ;;; implementation. Here is a complete list of the dependencies it has and the
- ;;; assumptions it makes beyond stock R5RS Scheme:
- ;;;
- ;;; This code has the following non-R5RS dependencies:
- ;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro;
- ;;;
- ;;; - Various imports from the char-set library for the routines that can
- ;;; take char-set arguments;
- ;;;
- ;;; - An ASSERTION-VIOLATION procedure;
- ;;;
- ;;; - BITWISE-AND for the hash functions;
- ;;;
- ;;; - A simple CHECK-ARG procedure for checking parameter values; it is
- ;;; (lambda (pred val proc)
- ;;; (if (pred val) val (assertion-violation 'check-arg "Bad arg" val pred proc)))
- ;;;
- ;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting &
- ;;; type-checking optional parameters from a rest argument;
- ;;;
- ;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE &
- ;;; STRING-TITLECASE! procedures. The former returns true iff a character is
- ;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z.
- ;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII &
- ;;; Latin-1, it is the same as CHAR-UPCASE.
- ;;;
- ;;; The code depends upon a small set of core string primitives from R5RS:
- ;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING
- ;;; (Actually, SUBSTRING is not a primitive, but we assume that an
- ;;; implementation's native version is probably faster than one we could
- ;;; define, so we import it from R5RS.)
- ;;;
- ;;; The code depends upon a small set of R5RS character primitives:
- ;;; char? char=? char-ci=? char<? char-ci<?
- ;;; char-upcase char-downcase
- ;;; char->integer (for the hash functions)
- ;;;
- ;;; We assume the following:
- ;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE
- ;;; - CHAR-CI=? is equivalent to
- ;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1))
- ;;; (char-downcase (char-upcase c2))))
- ;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive
- ;;; and consistent with Unicode's 1-1 char-mapping spec.
- ;;; These things are typically true, but if not, you would need to modify
- ;;; the case-mapping and case-insensitive routines.
- ;;; Enough introductory blather. On to the source code. (But see the end of
- ;;; the file for further notes on porting & performance tuning.)
- ; Start S48 additions
- (define (check-arg pred val caller)
- (if (not (pred val))
- (assertion-violation caller "invalid argument" val))
- val)
- (define-syntax :optional
- (syntax-rules ()
- ((:optional rest default-exp)
- (let ((maybe-arg rest))
- (if (pair? maybe-arg)
- (if (null? (cdr maybe-arg)) (car maybe-arg)
- (apply assertion-violation ':optional
- "too many optional arguments" maybe-arg))
- default-exp)))
- ((:optional rest default-exp arg-test)
- (let ((maybe-arg rest))
- (if (pair? maybe-arg)
- (if (null? (cdr maybe-arg))
- (let ((val (car maybe-arg)))
- (if (arg-test val)
- val
- (assertion-violation ':optional
- "Optional argument failed test"
- arg-test val)))
- (apply assertion-violation ':optional
- "too many optional arguments"
- maybe-arg))
- default-exp)))))
- (define-syntax let-optionals*
- (syntax-rules ()
- ((let-optionals* arg (opt-clause ...) body ...)
- (let ((rest arg))
- (%let-optionals* rest (opt-clause ...) body ...)))))
- (define-syntax %let-optionals*
- (syntax-rules ()
- ((%let-optionals* arg (((var ...) xparser) opt-clause ...) body ...)
- (call-with-values (lambda () (xparser arg))
- (lambda (rest var ...)
- (%let-optionals* rest (opt-clause ...) body ...))))
-
- ((%let-optionals* arg ((var default) opt-clause ...) body ...)
- (call-with-values (lambda () (if (null? arg) (values default '())
- (values (car arg) (cdr arg))))
- (lambda (var rest)
- (%let-optionals* rest (opt-clause ...) body ...))))
- ((%let-optionals* arg ((var default test) opt-clause ...) body ...)
- (call-with-values (lambda ()
- (if (null? arg) (values default '())
- (let ((var (car arg)))
- (if test (values var (cdr arg))
- (assertion-violation 'let-opt
- "arg failed LET-OPT test" var)))))
- (lambda (var rest)
- (%let-optionals* rest (opt-clause ...) body ...))))
- ((%let-optionals* arg ((var default test supplied?) opt-clause ...) body ...)
- (call-with-values (lambda ()
- (if (null? arg) (values default #f '())
- (let ((var (car arg)))
- (if test (values var #t (cdr arg))
- (assertion-violation 'let-opt
- "arg failed LET-OPT test" var)))))
- (lambda (var supplied? rest)
- (%let-optionals* rest (opt-clause ...) body ...))))
- ((%let-optionals* arg (rest) body ...)
- (let ((rest arg)) body ...))
- ((%let-optionals* arg () body ...)
- (if (null? arg) (begin body ...)
- (assertion-violation 'let-opt "Too many arguments in let-opt" arg)))))
- (define (char-cased? c)
- (or (char-lower-case? c)
- (char-upper-case? c)
- (char-title-case? c)))
-
- ; End S48 additions
- ;;; Support for START/END substring specs
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; This macro parses optional start/end arguments from arg lists, defaulting
- ;;; them to 0/(string-length s), and checks them for correctness.
- (define-syntax let-string-start+end
- (syntax-rules ()
- ((let-string-start+end (start end) proc s-exp args-exp body ...)
- (receive (start end) (string-parse-final-start+end 'proc s-exp args-exp)
- body ...))
- ((let-string-start+end (start end rest) proc s-exp args-exp body ...)
- (receive (rest start end) (string-parse-start+end 'proc s-exp args-exp)
- body ...))))
- ;;; This one parses out a *pair* of final start/end indices.
- ;;; Not exported; for internal use.
- (define-syntax let-string-start+end2
- (syntax-rules ()
- ((l-s-s+e2 (start1 end1 start2 end2) proc s1 s2 args body ...)
- (let ((procv proc)) ; Make sure PROC is only evaluated once.
- (let-string-start+end (start1 end1 rest) procv s1 args
- (let-string-start+end (start2 end2) procv s2 rest
- body ...))))))
- ;;; Returns three values: rest start end
- (define (string-parse-start+end proc s args)
- (if (not (string? s)) (assertion-violation proc "Non-string value" s))
- (let ((slen (string-length s)))
- (if (pair? args)
- (let ((start (car args))
- (args (cdr args)))
- (if (and (integer? start) (exact? start) (>= start 0))
- (receive (end args)
- (if (pair? args)
- (let ((end (car args))
- (args (cdr args)))
- (if (and (integer? end) (exact? end) (<= end slen))
- (values end args)
- (assertion-violation proc "Illegal substring END spec"
- end s)))
- (values slen args))
- (if (<= start end) (values args start end)
- (assertion-violation proc "Illegal substring START/END spec"
- start end s)))
- (assertion-violation proc "Illegal substring START spec" start s)))
- (values '() 0 slen))))
- (define (string-parse-final-start+end proc s args)
- (receive (rest start end) (string-parse-start+end proc s args)
- (if (pair? rest)
- (assertion-violation proc "Extra arguments to procedure" rest)
- (values start end))))
- (define (substring-spec-ok? s start end)
- (and (string? s)
- (integer? start)
- (exact? start)
- (integer? end)
- (exact? end)
- (<= 0 start)
- (<= start end)
- (<= end (string-length s))))
- (define (check-substring-spec proc s start end)
- (if (not (substring-spec-ok? s start end))
- (assertion-violation proc "Illegal substring spec." s start end)))
- ;;; Defined by R5RS, so commented out here.
- ;(define (string . chars)
- ; (let* ((len (length chars))
- ; (ans (make-string len)))
- ; (do ((i 0 (+ i 1))
- ; (chars chars (cdr chars)))
- ; ((>= i len))
- ; (string-set! ans i (car chars)))
- ; ans))
- ;
- ;(define (string . chars) (string-unfold null? car cdr chars))
- ;;; substring/shared S START [END]
- ;;; string-copy S [START END]
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; All this goop is just arg parsing & checking surrounding a call to the
- ;;; actual primitive, %SUBSTRING/SHARED.
- (define (substring/shared s start . maybe-end)
- (check-arg string? s 'substring/shared)
- (let ((slen (string-length s)))
- (check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start)))
- start 'substring/shared)
- (%substring/shared s start
- (:optional maybe-end slen
- (lambda (end) (and (integer? end)
- (exact? end)
- (<= start end)
- (<= end slen)))))))
- ;;; Split out so that other routines in this library can avoid arg-parsing
- ;;; overhead for END parameter.
- (define (%substring/shared s start end)
- (if (and (zero? start) (= end (string-length s))) s
- (substring s start end)))
- (define (string-copy s . maybe-start+end)
- (let-string-start+end (start end) string-copy s maybe-start+end
- (substring s start end)))
- ;This library uses the R5RS SUBSTRING, but doesn't export it.
- ;Here is a definition, just for completeness.
- ;(define (substring s start end)
- ; (check-substring-spec 'substring s start end)
- ; (let* ((slen (- end start))
- ; (ans (make-string slen)))
- ; (do ((i 0 (+ i 1))
- ; (j start (+ j 1)))
- ; ((>= i slen) ans)
- ; (string-set! ans i (string-ref s j)))))
- ;;; Basic iterators and other higher-order abstractions
- ;;; (string-map proc s [start end])
- ;;; (string-map! proc s [start end])
- ;;; (string-fold kons knil s [start end])
- ;;; (string-fold-right kons knil s [start end])
- ;;; (string-unfold p f g seed [base make-final])
- ;;; (string-unfold-right p f g seed [base make-final])
- ;;; (string-for-each proc s [start end])
- ;;; (string-for-each-index proc s [start end])
- ;;; (string-every char-set/char/pred s [start end])
- ;;; (string-any char-set/char/pred s [start end])
- ;;; (string-tabulate proc len)
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; You want compiler support for high-level transforms on fold and unfold ops.
- ;;; You'd at least like a lot of inlining for clients of these procedures.
- ;;; Don't hold your breath.
- (define (string-map proc s . maybe-start+end)
- (check-arg procedure? proc 'string-map)
- (let-string-start+end (start end) string-map s maybe-start+end
- (%string-map proc s start end)))
- (define (%string-map proc s start end) ; Internal utility
- (let* ((len (- end start))
- (ans (make-string len)))
- (do ((i (- end 1) (- i 1))
- (j (- len 1) (- j 1)))
- ((< j 0))
- (string-set! ans j (proc (string-ref s i))))
- ans))
- (define (string-map! proc s . maybe-start+end)
- (check-arg procedure? proc 'string-map!)
- (let-string-start+end (start end) string-map! s maybe-start+end
- (%string-map! proc s start end)))
- (define (%string-map! proc s start end)
- (do ((i (- end 1) (- i 1)))
- ((< i start))
- (string-set! s i (proc (string-ref s i)))))
- (define (string-fold kons knil s . maybe-start+end)
- (check-arg procedure? kons 'string-fold)
- (let-string-start+end (start end) string-fold s maybe-start+end
- (let lp ((v knil) (i start))
- (if (< i end) (lp (kons (string-ref s i) v) (+ i 1))
- v))))
- (define (string-fold-right kons knil s . maybe-start+end)
- (check-arg procedure? kons 'string-fold-right)
- (let-string-start+end (start end) string-fold-right s maybe-start+end
- (let lp ((v knil) (i (- end 1)))
- (if (>= i start) (lp (kons (string-ref s i) v) (- i 1))
- v))))
- ;;; (string-unfold p f g seed [base make-final])
- ;;; This is the fundamental constructor for strings.
- ;;; - G is used to generate a series of "seed" values from the initial seed:
- ;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ...
- ;;; - P tells us when to stop -- when it returns true when applied to one
- ;;; of these seed values.
- ;;; - F maps each seed value to the corresponding character
- ;;; in the result string. These chars are assembled into the
- ;;; string in a left-to-right order.
- ;;; - BASE is the optional initial/leftmost portion of the constructed string;
- ;;; it defaults to the empty string "".
- ;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns
- ;;; true) to produce the final/rightmost portion of the constructed string.
- ;;; It defaults to (LAMBDA (X) "").
- ;;;
- ;;; In other words, the following (simple, inefficient) definition holds:
- ;;; (define (string-unfold p f g seed base make-final)
- ;;; (string-append base
- ;;; (let recur ((seed seed))
- ;;; (if (p seed) (make-final seed)
- ;;; (string-append (string (f seed))
- ;;; (recur (g seed)))))))
- ;;;
- ;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to
- ;;; reverse a string, copy a string, convert a list to a string, read
- ;;; a port into a string, and so forth. Examples:
- ;;; (port->string port) =
- ;;; (string-unfold (compose eof-object? peek-char)
- ;;; read-char values port)
- ;;;
- ;;; (list->string lis) = (string-unfold null? car cdr lis)
- ;;;
- ;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0)
- ;;; A problem with the following simple formulation is that it pushes one
- ;;; stack frame for every char in the result string -- an issue if you are
- ;;; using it to read a 100kchar string. So we don't use it -- but I include
- ;;; it to give a clear, straightforward description of what the function
- ;;; does.
- ;(define (string-unfold p f g seed base make-final)
- ; (let ((ans (let recur ((seed seed) (i (string-length base)))
- ; (if (p seed)
- ; (let* ((final (make-final seed))
- ; (ans (make-string (+ i (string-length final)))))
- ; (string-copy! ans i final)
- ; ans)
- ;
- ; (let* ((c (f seed))
- ; (s (recur (g seed) (+ i 1))))
- ; (string-set! s i c)
- ; s)))))
- ; (string-copy! ans 0 base)
- ; ans))
- ;;; The strategy is to allocate a series of chunks into which we stash the
- ;;; chars as we generate them. Chunk size goes up in powers of two starting
- ;;; with 40 and levelling out at 4k, i.e.
- ;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096...
- ;;; This should work pretty well for short strings, 1-line (80 char) strings,
- ;;; and longer ones. When done, we allocate an answer string and copy the
- ;;; chars over from the chunk buffers.
- (define (string-unfold p f g seed . base+make-final)
- (check-arg procedure? p 'string-unfold)
- (check-arg procedure? f 'string-unfold)
- (check-arg procedure? g 'string-unfold)
- (let-optionals* base+make-final
- ((base "" (string? base))
- (make-final (lambda (x) "") (procedure? make-final)))
- (let lp ((chunks '()) ; Previously filled chunks
- (nchars 0) ; Number of chars in CHUNKS
- (chunk (make-string 40)) ; Current chunk into which we write
- (chunk-len 40)
- (i 0) ; Number of chars written into CHUNK
- (seed seed))
- (let lp2 ((i i) (seed seed))
- (if (not (p seed))
- (let ((c (f seed))
- (seed (g seed)))
- (if (< i chunk-len)
- (begin (string-set! chunk i c)
- (lp2 (+ i 1) seed))
- (let* ((nchars2 (+ chunk-len nchars))
- (chunk-len2 (min 4096 nchars2))
- (new-chunk (make-string chunk-len2)))
- (string-set! new-chunk 0 c)
- (lp (cons chunk chunks) (+ nchars chunk-len)
- new-chunk chunk-len2 1 seed))))
- ;; We're done. Make the answer string & install the bits.
- (let* ((final (make-final seed))
- (flen (string-length final))
- (base-len (string-length base))
- (j (+ base-len nchars i))
- (ans (make-string (+ j flen))))
- (%string-copy! ans j final 0 flen) ; Install FINAL.
- (let ((j (- j i)))
- (%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I).
- (let lp ((j j) (chunks chunks)) ; Install CHUNKS.
- (if (pair? chunks)
- (let* ((chunk (car chunks))
- (chunks (cdr chunks))
- (chunk-len (string-length chunk))
- (j (- j chunk-len)))
- (%string-copy! ans j chunk 0 chunk-len)
- (lp j chunks)))))
- (%string-copy! ans 0 base 0 base-len) ; Install BASE.
- ans))))))
- (define (string-unfold-right p f g seed . base+make-final)
- (let-optionals* base+make-final
- ((base "" (string? base))
- (make-final (lambda (x) "") (procedure? make-final)))
- (let lp ((chunks '()) ; Previously filled chunks
- (nchars 0) ; Number of chars in CHUNKS
- (chunk (make-string 40)) ; Current chunk into which we write
- (chunk-len 40)
- (i 40) ; Number of chars available in CHUNK
- (seed seed))
- (let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right
- (if (not (p seed)) ; to left.
- (let ((c (f seed))
- (seed (g seed)))
- (if (> i 0)
- (let ((i (- i 1)))
- (string-set! chunk i c)
- (lp2 i seed))
- (let* ((nchars2 (+ chunk-len nchars))
- (chunk-len2 (min 4096 nchars2))
- (new-chunk (make-string chunk-len2))
- (i (- chunk-len2 1)))
- (string-set! new-chunk i c)
- (lp (cons chunk chunks) (+ nchars chunk-len)
- new-chunk chunk-len2 i seed))))
- ;; We're done. Make the answer string & install the bits.
- (let* ((final (make-final seed))
- (flen (string-length final))
- (base-len (string-length base))
- (chunk-used (- chunk-len i))
- (j (+ base-len nchars chunk-used))
- (ans (make-string (+ j flen))))
- (%string-copy! ans 0 final 0 flen) ; Install FINAL.
- (%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,).
- (let lp ((j (+ flen chunk-used)) ; Install CHUNKS.
- (chunks chunks))
- (if (pair? chunks)
- (let* ((chunk (car chunks))
- (chunks (cdr chunks))
- (chunk-len (string-length chunk)))
- (%string-copy! ans j chunk 0 chunk-len)
- (lp (+ j chunk-len) chunks))
- (%string-copy! ans j base 0 base-len))); Install BASE.
- ans))))))
- (define (string-for-each proc s . maybe-start+end)
- (check-arg procedure? proc 'string-for-each)
- (let-string-start+end (start end) string-for-each s maybe-start+end
- (let lp ((i start))
- (if (< i end)
- (begin (proc (string-ref s i))
- (lp (+ i 1)))))))
- (define (string-for-each-index proc s . maybe-start+end)
- (check-arg procedure? proc 'string-for-each-index)
- (let-string-start+end (start end) string-for-each-index s maybe-start+end
- (let lp ((i start))
- (if (< i end) (begin (proc i) (lp (+ i 1)))))))
- (define (string-every criterion s . maybe-start+end)
- (let-string-start+end (start end) string-every s maybe-start+end
- (cond ((char? criterion)
- (let lp ((i start))
- (or (>= i end)
- (and (char=? criterion (string-ref s i))
- (lp (+ i 1))))))
- ((char-set? criterion)
- (let lp ((i start))
- (or (>= i end)
- (and (char-set-contains? criterion (string-ref s i))
- (lp (+ i 1))))))
- ((procedure? criterion) ; Slightly funky loop so that
- (or (= start end) ; final (PRED S[END-1]) call
- (let lp ((i start)) ; is a tail call.
- (let ((c (string-ref s i))
- (i1 (+ i 1)))
- (if (= i1 end) (criterion c) ; Tail call.
- (and (criterion c) (lp i1)))))))
- (else (assertion-violation 'string-every
- "Second param is neither char-set, char, or predicate procedure."
- criterion)))))
- (define (string-any criterion s . maybe-start+end)
- (let-string-start+end (start end) string-any s maybe-start+end
- (cond ((char? criterion)
- (let lp ((i start))
- (and (< i end)
- (or (char=? criterion (string-ref s i))
- (lp (+ i 1))))))
- ((char-set? criterion)
- (let lp ((i start))
- (and (< i end)
- (or (char-set-contains? criterion (string-ref s i))
- (lp (+ i 1))))))
- ((procedure? criterion) ; Slightly funky loop so that
- (and (< start end) ; final (PRED S[END-1]) call
- (let lp ((i start)) ; is a tail call.
- (let ((c (string-ref s i))
- (i1 (+ i 1)))
- (if (= i1 end) (criterion c) ; Tail call
- (or (criterion c) (lp i1)))))))
- (else (assertion-violation 'string-any
- "Second param is neither char-set, char, or predicate procedure."
- criterion)))))
- (define (string-tabulate proc len)
- (check-arg procedure? proc 'string-tabulate)
- (check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val)))
- len 'string-tabulate)
- (let ((s (make-string len)))
- (do ((i (- len 1) (- i 1)))
- ((< i 0))
- (string-set! s i (proc i)))
- s))
- ;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2]
- ;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2]
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Find the length of the common prefix/suffix.
- ;;; It is not required that the two substrings passed be of equal length.
- ;;; This was microcode in MIT Scheme -- a very tightly bummed primitive.
- ;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons,
- ;;; so should be as tense as possible.
- (define (%string-prefix-length s1 start1 end1 s2 start2 end2)
- (let* ((delta (min (- end1 start1) (- end2 start2)))
- (end1 (+ start1 delta)))
- (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path
- delta
- (let lp ((i start1) (j start2)) ; Regular path
- (if (or (>= i end1)
- (not (char=? (string-ref s1 i)
- (string-ref s2 j))))
- (- i start1)
- (lp (+ i 1) (+ j 1)))))))
- (define (%string-suffix-length s1 start1 end1 s2 start2 end2)
- (let* ((delta (min (- end1 start1) (- end2 start2)))
- (start1 (- end1 delta)))
- (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path
- delta
- (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path
- (if (or (< i start1)
- (not (char=? (string-ref s1 i)
- (string-ref s2 j))))
- (- (- end1 i) 1)
- (lp (- i 1) (- j 1)))))))
- (define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)
- (let* ((delta (min (- end1 start1) (- end2 start2)))
- (end1 (+ start1 delta)))
- (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path
- delta
- (let lp ((i start1) (j start2)) ; Regular path
- (if (or (>= i end1)
- (not (char-ci=? (string-ref s1 i)
- (string-ref s2 j))))
- (- i start1)
- (lp (+ i 1) (+ j 1)))))))
- (define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2)
- (let* ((delta (min (- end1 start1) (- end2 start2)))
- (start1 (- end1 delta)))
- (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path
- delta
- (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path
- (if (or (< i start1)
- (not (char-ci=? (string-ref s1 i)
- (string-ref s2 j))))
- (- (- end1 i) 1)
- (lp (- i 1) (- j 1)))))))
- (define (string-prefix-length s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-prefix-length s1 s2 maybe-starts+ends
- (%string-prefix-length s1 start1 end1 s2 start2 end2)))
- (define (string-suffix-length s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-suffix-length s1 s2 maybe-starts+ends
- (%string-suffix-length s1 start1 end1 s2 start2 end2)))
- (define (string-prefix-length-ci s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-prefix-length-ci s1 s2 maybe-starts+ends
- (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)))
- (define (string-suffix-length-ci s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-suffix-length-ci s1 s2 maybe-starts+ends
- (%string-suffix-length-ci s1 start1 end1 s2 start2 end2)))
- ;;; string-prefix? s1 s2 [start1 end1 start2 end2]
- ;;; string-suffix? s1 s2 [start1 end1 start2 end2]
- ;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2]
- ;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2]
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; These are all simple derivatives of the previous counting funs.
- (define (string-prefix? s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-prefix? s1 s2 maybe-starts+ends
- (%string-prefix? s1 start1 end1 s2 start2 end2)))
- (define (string-suffix? s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-suffix? s1 s2 maybe-starts+ends
- (%string-suffix? s1 start1 end1 s2 start2 end2)))
- (define (string-prefix-ci? s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-prefix-ci? s1 s2 maybe-starts+ends
- (%string-prefix-ci? s1 start1 end1 s2 start2 end2)))
- (define (string-suffix-ci? s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-suffix-ci? s1 s2 maybe-starts+ends
- (%string-suffix-ci? s1 start1 end1 s2 start2 end2)))
- ;;; Here are the internal routines that do the real work.
- (define (%string-prefix? s1 start1 end1 s2 start2 end2)
- (let ((len1 (- end1 start1)))
- (and (<= len1 (- end2 start2)) ; Quick check
- (= (%string-prefix-length s1 start1 end1
- s2 start2 end2)
- len1))))
- (define (%string-suffix? s1 start1 end1 s2 start2 end2)
- (let ((len1 (- end1 start1)))
- (and (<= len1 (- end2 start2)) ; Quick check
- (= len1 (%string-suffix-length s1 start1 end1
- s2 start2 end2)))))
- (define (%string-prefix-ci? s1 start1 end1 s2 start2 end2)
- (let ((len1 (- end1 start1)))
- (and (<= len1 (- end2 start2)) ; Quick check
- (= len1 (%string-prefix-length-ci s1 start1 end1
- s2 start2 end2)))))
- (define (%string-suffix-ci? s1 start1 end1 s2 start2 end2)
- (let ((len1 (- end1 start1)))
- (and (<= len1 (- end2 start2)) ; Quick check
- (= len1 (%string-suffix-length-ci s1 start1 end1
- s2 start2 end2)))))
- ;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2]
- ;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2]
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Primitive string-comparison functions.
- ;;; Continuation order is different from MIT Scheme.
- ;;; Continuations are applied to s1's mismatch index;
- ;;; in the case of equality, this is END1.
- (define (%string-compare s1 start1 end1 s2 start2 end2
- proc< proc= proc>)
- (let ((size1 (- end1 start1))
- (size2 (- end2 start2)))
- (let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2)))
- (if (= match size1)
- ((if (= match size2) proc= proc<) end1)
- ((if (= match size2)
- proc>
- (if (char<? (string-ref s1 (+ start1 match))
- (string-ref s2 (+ start2 match)))
- proc< proc>))
- (+ match start1))))))
- (define (%string-compare-ci s1 start1 end1 s2 start2 end2
- proc< proc= proc>)
- (let ((size1 (- end1 start1))
- (size2 (- end2 start2)))
- (let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2)))
- (if (= match size1)
- ((if (= match size2) proc= proc<) end1)
- ((if (= match size2) proc>
- (if (char-ci<? (string-ref s1 (+ start1 match))
- (string-ref s2 (+ start2 match)))
- proc< proc>))
- (+ start1 match))))))
- (define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends)
- (check-arg procedure? proc< 'string-compare)
- (check-arg procedure? proc= 'string-compare)
- (check-arg procedure? proc> 'string-compare)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-compare s1 s2 maybe-starts+ends
- (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>)))
- (define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends)
- (check-arg procedure? proc< 'string-compare-ci)
- (check-arg procedure? proc= 'string-compare-ci)
- (check-arg procedure? proc> 'string-compare-ci)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-compare-ci s1 s2 maybe-starts+ends
- (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>)))
- ;;; string= string<> string-ci= string-ci<>
- ;;; string< string> string-ci< string-ci>
- ;;; string<= string>= string-ci<= string-ci>=
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Simple definitions in terms of the previous comparison funs.
- ;;; I sure hope the %STRING-COMPARE calls get integrated.
- (define (string= s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string= s1 s2 maybe-starts+ends
- (and (= (- end1 start1) (- end2 start2)) ; Quick filter
- (or (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #f)
- (lambda (i) #t)
- (lambda (i) #f))))))
- (define (string<> s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string<> s1 s2 maybe-starts+ends
- (or (not (= (- end1 start1) (- end2 start2))) ; Fast path
- (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter
- (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #t)
- (lambda (i) #f)
- (lambda (i) #t))))))
- (define (string< s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string< s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (< end1 end2)
- (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #t)
- (lambda (i) #f)
- (lambda (i) #f)))))
- (define (string> s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string> s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (> end1 end2)
- (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #f)
- (lambda (i) #f)
- (lambda (i) #t)))))
- (define (string<= s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string<= s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (<= end1 end2)
- (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #t)
- (lambda (i) #t)
- (lambda (i) #f)))))
- (define (string>= s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string>= s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (>= end1 end2)
- (%string-compare s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #f)
- (lambda (i) #t)
- (lambda (i) #t)))))
- (define (string-ci= s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-ci= s1 s2 maybe-starts+ends
- (and (= (- end1 start1) (- end2 start2)) ; Quick filter
- (or (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #f)
- (lambda (i) #t)
- (lambda (i) #f))))))
- (define (string-ci<> s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-ci<> s1 s2 maybe-starts+ends
- (or (not (= (- end1 start1) (- end2 start2))) ; Fast path
- (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter
- (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #t)
- (lambda (i) #f)
- (lambda (i) #t))))))
- (define (string-ci< s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-ci< s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (< end1 end2)
- (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #t)
- (lambda (i) #f)
- (lambda (i) #f)))))
- (define (string-ci> s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-ci> s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (> end1 end2)
- (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #f)
- (lambda (i) #f)
- (lambda (i) #t)))))
- (define (string-ci<= s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-ci<= s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (<= end1 end2)
- (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #t)
- (lambda (i) #t)
- (lambda (i) #f)))))
- (define (string-ci>= s1 s2 . maybe-starts+ends)
- (let-string-start+end2 (start1 end1 start2 end2)
- string-ci>= s1 s2 maybe-starts+ends
- (if (and (eq? s1 s2) (= start1 start2)) ; Fast path
- (>= end1 end2)
- (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test
- (lambda (i) #f)
- (lambda (i) #t)
- (lambda (i) #t)))))
- ;;; Hash
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in
- ;;; to keep the intermediate values small. (We do the calculation with just
- ;;; enough bits to represent BOUND, masking off high bits at each step in
- ;;; calculation. If this screws up any important properties of the hash
- ;;; function I'd like to hear about it. -Olin)
- ;;;
- ;;; If you keep BOUND small enough, the intermediate calculations will
- ;;; always be fixnums. How small is dependent on the underlying Scheme system;
- ;;; we use a default BOUND of 2^22 = 4194304, which should hack it in
- ;;; Schemes that give you at least 29 signed bits for fixnums. The core
- ;;; calculation that you don't want to overflow is, worst case,
- ;;; (+ 65535 (* 37 (- bound 1)))
- ;;; where 65535 is the max character code. Choose the default BOUND to be the
- ;;; biggest power of two that won't cause this expression to fixnum overflow,
- ;;; and everything will be copacetic.
- (define (%string-hash s char->int bound start end)
- (let ((iref (lambda (s i) (char->int (string-ref s i))))
- ;; Compute a 111...1 mask that will cover BOUND-1:
- (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh?
- (if (>= i bound) (- i 1) (lp (+ i i))))))
- (let lp ((i start) (ans 0))
- (if (>= i end) (modulo ans bound)
- (lp (+ i 1) (bitwise-and mask (+ (* 37 ans) (iref s i))))))))
- (define (string-hash s . maybe-bound+start+end)
- (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound)
- (exact? bound)
- (<= 0 bound)))
- rest)
- (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default.
- (let-string-start+end (start end) string-hash s rest
- (%string-hash s char->integer bound start end)))))
- (define (string-hash-ci s . maybe-bound+start+end)
- (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound)
- (exact? bound)
- (<= 0 bound)))
- rest)
- (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default.
- (let-string-start+end (start end) string-hash-ci s rest
- (%string-hash s (lambda (c) (char->integer (char-downcase c)))
- bound start end)))))
- ;;; Case hacking
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; string-upcase s [start end]
- ;;; string-upcase! s [start end]
- ;;; string-downcase s [start end]
- ;;; string-downcase! s [start end]
- ;;;
- ;;; string-titlecase s [start end]
- ;;; string-titlecase! s [start end]
- ;;; Capitalize every contiguous alpha sequence: capitalise
- ;;; first char, lowercase rest.
- (define (string-upcase s . maybe-start+end)
- (let-string-start+end (start end) string-upcase s maybe-start+end
- (%string-map char-upcase s start end)))
- (define (string-upcase! s . maybe-start+end)
- (let-string-start+end (start end) string-upcase! s maybe-start+end
- (%string-map! char-upcase s start end)))
- (define (string-downcase s . maybe-start+end)
- (let-string-start+end (start end) string-downcase s maybe-start+end
- (%string-map char-downcase s start end)))
- (define (string-downcase! s . maybe-start+end)
- (let-string-start+end (start end) string-downcase! s maybe-start+end
- (%string-map! char-downcase s start end)))
- (define (%string-titlecase! s start end)
- (let lp ((i start))
- (cond ((string-index s char-cased? i end) =>
- (lambda (i)
- (string-set! s i (char-titlecase (string-ref s i)))
- (let ((i1 (+ i 1)))
- (cond ((string-skip s char-cased? i1 end) =>
- (lambda (j)
- (string-downcase! s i1 j)
- (lp (+ j 1))))
- (else (string-downcase! s i1 end)))))))))
- (define (string-titlecase! s . maybe-start+end)
- (let-string-start+end (start end) string-titlecase! s maybe-start+end
- (%string-titlecase! s start end)))
- (define (string-titlecase s . maybe-start+end)
- (let-string-start+end (start end) string-titlecase! s maybe-start+end
- (let ((ans (substring s start end)))
- (%string-titlecase! ans 0 (- end start))
- ans)))
- ;;; Cutting & pasting strings
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; string-take string nchars
- ;;; string-drop string nchars
- ;;;
- ;;; string-take-right string nchars
- ;;; string-drop-right string nchars
- ;;;
- ;;; string-pad string k [char start end]
- ;;; string-pad-right string k [char start end]
- ;;;
- ;;; string-trim string [char/char-set/pred start end]
- ;;; string-trim-right string [char/char-set/pred start end]
- ;;; string-trim-both string [char/char-set/pred start end]
- ;;;
- ;;; These trimmers invert the char-set meaning from MIT Scheme -- you
- ;;; say what you want to trim.
- (define (string-take s n)
- (check-arg string? s 'string-take)
- (check-arg (lambda (val) (and (integer? n) (exact? n)
- (<= 0 n (string-length s))))
- n 'string-take)
- (%substring/shared s 0 n))
- (define (string-take-right s n)
- (check-arg string? s 'string-take-right)
- (let ((len (string-length s)))
- (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
- n 'string-take-right)
- (%substring/shared s (- len n) len)))
- (define (string-drop s n)
- (check-arg string? s 'string-drop)
- (let ((len (string-length s)))
- (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
- n 'string-drop)
- (%substring/shared s n len)))
- (define (string-drop-right s n)
- (check-arg string? s 'string-drop-right)
- (let ((len (string-length s)))
- (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len)))
- n 'string-drop-right)
- (%substring/shared s 0 (- len n))))
- (define (string-trim s . criterion+start+end)
- (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
- (let-string-start+end (start end) string-trim s rest
- (cond ((string-skip s criterion start end) =>
- (lambda (i) (%substring/shared s i end)))
- (else "")))))
- (define (string-trim-right s . criterion+start+end)
- (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
- (let-string-start+end (start end) string-trim-right s rest
- (cond ((string-skip-right s criterion start end) =>
- (lambda (i) (%substring/shared s start (+ 1 i))))
- (else "")))))
- (define (string-trim-both s . criterion+start+end)
- (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest)
- (let-string-start+end (start end) string-trim-both s rest
- (cond ((string-skip s criterion start end) =>
- (lambda (i)
- (%substring/shared s i (+ 1 (string-skip-right s criterion i end)))))
- (else "")))))
- (define (string-pad-right s n . char+start+end)
- (let-optionals* char+start+end ((char #\space (char? char)) rest)
- (let-string-start+end (start end) string-pad-right s rest
- (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n)))
- n 'string-pad-right)
- (let ((len (- end start)))
- (if (<= n len)
- (%substring/shared s start (+ start n))
- (let ((ans (make-string n char)))
- (%string-copy! ans 0 s start end)
- ans))))))
- (define (string-pad s n . char+start+end)
- (let-optionals* char+start+end ((char #\space (char? char)) rest)
- (let-string-start+end (start end) string-pad s rest
- (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n)))
- n 'string-pad)
- (let ((len (- end start)))
- (if (<= n len)
- (%substring/shared s (- end n) end)
- (let ((ans (make-string n char)))
- (%string-copy! ans (- n len) s start end)
- ans))))))
- ;;; Filtering strings
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; string-delete char/char-set/pred string [start end]
- ;;; string-filter char/char-set/pred string [start end]
- ;;;
- ;;; If the criterion is a char or char-set, we scan the string twice with
- ;;; string-fold -- once to determine the length of the result string,
- ;;; and once to do the filtered copy.
- ;;; If the criterion is a predicate, we don't do this double-scan strategy,
- ;;; because the predicate might have side-effects or be very expensive to
- ;;; compute. So we preallocate a temp buffer pessimistically, and only do
- ;;; one scan over S. This is likely to be faster and more space-efficient
- ;;; than consing a list.
- (define (string-delete criterion s . maybe-start+end)
- (let-string-start+end (start end) string-delete s maybe-start+end
- (if (procedure? criterion)
- (let* ((slen (- end start))
- (temp (make-string slen))
- (ans-len (string-fold (lambda (c i)
- (if (criterion c) i
- (begin (string-set! temp i c)
- (+ i 1))))
- 0 s start end)))
- (if (= ans-len slen) temp (substring temp 0 ans-len)))
- (let* ((cset (cond ((char-set? criterion) criterion)
- ((char? criterion) (char-set criterion))
- (else
- (assertion-violation 'string-delete "string-delete criterion not predicate, char or char-set" criterion))))
- (len (string-fold (lambda (c i) (if (char-set-contains? cset c)
- i
- (+ i 1)))
- 0 s start end))
- (ans (make-string len)))
- (string-fold (lambda (c i) (if (char-set-contains? cset c)
- i
- (begin (string-set! ans i c)
- (+ i 1))))
- 0 s start end)
- ans))))
- (define (string-filter criterion s . maybe-start+end)
- (let-string-start+end (start end) string-filter s maybe-start+end
- (if (procedure? criterion)
- (let* ((slen (- end start))
- (temp (make-string slen))
- (ans-len (string-fold (lambda (c i)
- (if (criterion c)
- (begin (string-set! temp i c)
- (+ i 1))
- i))
- 0 s start end)))
- (if (= ans-len slen) temp (substring temp 0 ans-len)))
- (let* ((cset (cond ((char-set? criterion) criterion)
- ((char? criterion) (char-set criterion))
- (else (assertion-violation 'string-filter "string-filter criterion not predicate, char or char-set" criterion))))
- (len (string-fold (lambda (c i) (if (char-set-contains? cset c)
- (+ i 1)
- i))
- 0 s start end))
- (ans (make-string len)))
- (string-fold (lambda (c i) (if (char-set-contains? cset c)
- (begin (string-set! ans i c)
- (+ i 1))
- i))
- 0 s start end)
- ans))))
- ;;; String search
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; string-index string char/char-set/pred [start end]
- ;;; string-index-right string char/char-set/pred [start end]
- ;;; string-skip string char/char-set/pred [start end]
- ;;; string-skip-right string char/char-set/pred [start end]
- ;;; string-count string char/char-set/pred [start end]
- ;;; There's a lot of replicated code here for efficiency.
- ;;; For example, the char/char-set/pred discrimination has
- ;;; been lifted above the inner loop of each proc.
- (define (string-index str criterion . maybe-start+end)
- (let-string-start+end (start end) string-index str maybe-start+end
- (cond ((char? criterion)
- (let lp ((i start))
- (and (< i end)
- (if (char=? criterion (string-ref str i)) i
- (lp (+ i 1))))))
- ((char-set? criterion)
- (let lp ((i start))
- (and (< i end)
- (if (char-set-contains? criterion (string-ref str i)) i
- (lp (+ i 1))))))
- ((procedure? criterion)
- (let lp ((i start))
- (and (< i end)
- (if (criterion (string-ref str i)) i
- (lp (+ i 1))))))
- (else (assertion-violation 'string-index
- "Second param is neither char-set, char, or predicate procedure."
- criterion)))))
- (define (string-index-right str criterion . maybe-start+end)
- (let-string-start+end (start end) string-index-right str maybe-start+end
- (cond ((char? criterion)
- (let lp ((i (- end 1)))
- (and (>= i start)
- (if (char=? criterion (string-ref str i)) i
- (lp (- i 1))))))
- ((char-set? criterion)
- (let lp ((i (- end 1)))
- (and (>= i start)
- (if (char-set-contains? criterion (string-ref str i)) i
- (lp (- i 1))))))
- ((procedure? criterion)
- (let lp ((i (- end 1)))
- (and (>= i start)
- (if (criterion (string-ref str i)) i
- (lp (- i 1))))))
- (else (assertion-violation
- 'string-index-right
- "Second param is neither char-set, char, or predicate procedure."
- criterion)))))
- (define (string-skip str criterion . maybe-start+end)
- (let-string-start+end (start end) string-skip str maybe-start+end
- (cond ((char? criterion)
- (let lp ((i start))
- (and (< i end)
- (if (char=? criterion (string-ref str i))
- (lp (+ i 1))
- i))))
- ((char-set? criterion)
- (let lp ((i start))
- (and (< i end)
- (if (char-set-contains? criterion (string-ref str i))
- (lp (+ i 1))
- i))))
- ((procedure? criterion)
- (let lp ((i start))
- (and (< i end)
- (if (criterion (string-ref str i)) (lp (+ i 1))
- i))))
- (else (assertion-violation
- 'string-skip
- "Second param is neither char-set, char, or predicate procedure."
- criterion)))))
- (define (string-skip-right str criterion . maybe-start+end)
- (let-string-start+end (start end) string-skip-right str maybe-start+end
- (cond ((char? criterion)
- (let lp ((i (- end 1)))
- (and (>= i start)
- (if (char=? criterion (string-ref str i))
- (lp (- i 1))
- i))))
- ((char-set? criterion)
- (let lp ((i (- end 1)))
- (and (>= i start)
- (if (char-set-contains? criterion (string-ref str i))
- (lp (- i 1))
- i))))
- ((procedure? criterion)
- (let lp ((i (- end 1)))
- (and (>= i start)
- (if (criterion (string-ref str i)) (lp (- i 1))
- i))))
- (else (assertion-violation 'string-skip-right
- "CRITERION param is neither char-set or char."
- criterion)))))
- (define (string-count s criterion . maybe-start+end)
- (let-string-start+end (start end) string-count s maybe-start+end
- (cond ((char? criterion)
- (do ((i start (+ i 1))
- (count 0 (if (char=? criterion (string-ref s i))
- (+ count 1)
- count)))
- ((>= i end) count)))
- ((char-set? criterion)
- (do ((i start (+ i 1))
- (count 0 (if (char-set-contains? criterion (string-ref s i))
- (+ count 1)
- count)))
- ((>= i end) count)))
- ((procedure? criterion)
- (do ((i start (+ i 1))
- (count 0 (if (criterion (string-ref s i)) (+ count 1) count)))
- ((>= i end) count)))
- (else (assertion-violation
- 'string-count
- "CRITERION param is neither char-set or char."
- criterion)))))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; string-fill! string char [start end]
- ;;;
- ;;; string-copy! to tstart from [fstart fend]
- ;;; Guaranteed to work, even if s1 eq s2.
- (define (string-fill! s char . maybe-start+end)
- (check-arg char? char 'string-fill!)
- (let-string-start+end (start end) string-fill! s maybe-start+end
- (do ((i (- end 1) (- i 1)))
- ((< i start))
- (string-set! s i char))))
- (define (string-copy! to tstart from . maybe-fstart+fend)
- (let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend
- (check-arg integer? tstart 'string-copy!)
- (check-substring-spec 'string-copy! to tstart (+ tstart (- fend fstart)))
- (%string-copy! to tstart from fstart fend)))
- ;;; Library-internal routine
- (define (%string-copy! to tstart from fstart fend)
- (if (> fstart tstart)
- (do ((i fstart (+ i 1))
- (j tstart (+ j 1)))
- ((>= i fend))
- (string-set! to j (string-ref from i)))
- (do ((i (- fend 1) (- i 1))
- (j (+ -1 tstart (- fend fstart)) (- j 1)))
- ((< i fstart))
- (string-set! to j (string-ref from i)))))
- ;;; Returns starting-position in STRING or #f if not true.
- ;;; This implementation is slow & simple. It is useful as a "spec" or for
- ;;; comparison testing with fancier implementations.
- ;;; See below for fast KMP version.
- ;(define (string-contains string substring . maybe-starts+ends)
- ; (let-string-start+end2 (start1 end1 start2 end2)
- ; string-contains string substring maybe-starts+ends
- ; (let* ((len (- end2 start2))
- ; (i-bound (- end1 len)))
- ; (let lp ((i start1))
- ; (and (< i i-bound)
- ; (if (string= string substring i (+ i len) start2 end2)
- ; i
- ; (lp (+ i 1))))))))
- ;;; Searching for an occurrence of a substring
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- (define (string-contains text pattern . maybe-starts+ends)
- (let-string-start+end2 (t-start t-end p-start p-end)
- string-contains text pattern maybe-starts+ends
- (%kmp-search pattern text char=? p-start p-end t-start t-end)))
- (define (string-contains-ci text pattern . maybe-starts+ends)
- (let-string-start+end2 (t-start t-end p-start p-end)
- string-contains-ci text pattern maybe-starts+ends
- (%kmp-search pattern text char-ci=? p-start p-end t-start t-end)))
- ;;; Knuth-Morris-Pratt string searching
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; See
- ;;; "Fast pattern matching in strings"
- ;;; SIAM J. Computing 6(2):323-350 1977
- ;;; D. E. Knuth, J. H. Morris and V. R. Pratt
- ;;; also described in
- ;;; "Pattern matching in strings"
- ;;; Alfred V. Aho
- ;;; Formal Language Theory - Perspectives and Open Problems
- ;;; Ronald V. Brook (editor)
- ;;; This algorithm is O(m + n) where m and n are the
- ;;; lengths of the pattern and string respectively
- ;;; KMP search source[start,end) for PATTERN. Return starting index of
- ;;; leftmost match or #f.
- (define (%kmp-search pattern text c= p-start p-end t-start t-end)
- (let ((plen (- p-end p-start))
- (rv (make-kmp-restart-vector pattern c= p-start p-end)))
- ;; The search loop. TJ & PJ are redundant state.
- (let lp ((ti t-start) (pi 0)
- (tj (- t-end t-start)) ; (- tlen ti) -- how many chars left.
- (pj plen)) ; (- plen pi) -- how many chars left.
- (if (= pi plen)
- (- ti plen) ; Win.
- (and (<= pj tj) ; Lose.
- (if (c= (string-ref text ti) ; Search.
- (string-ref pattern (+ p-start pi)))
- (lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance.
-
- (let ((pi (vector-ref rv pi))) ; Retreat.
- (if (= pi -1)
- (lp (+ ti 1) 0 (- tj 1) plen) ; Punt.
- (lp ti pi tj (- plen pi))))))))))
- ;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Compute the KMP restart vector RV for string PATTERN. If
- ;;; we have matched chars 0..i-1 of PATTERN against a search string S, and
- ;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to
- ;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to
- ;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k].
- ;;;
- ;;; In other words, if you have matched the first i chars of PATTERN, but
- ;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest
- ;;; prefix of PATTERN is that you have matched.
- ;;;
- ;;; - C= (default CHAR=?) is used to compare characters for equality.
- ;;; Pass in CHAR-CI=? for case-folded string search.
- ;;;
- ;;; - START & END restrict the pattern to the indicated substring; the
- ;;; returned vector will be of length END - START. The numbers stored
- ;;; in the vector will be values in the range [0,END-START) -- that is,
- ;;; they are valid indices into the restart vector; you have to add START
- ;;; to them to use them as indices into PATTERN.
- ;;;
- ;;; I've split this out as a separate function in case other constant-string
- ;;; searchers might want to use it.
- ;;;
- ;;; E.g.:
- ;;; a b d a b x
- ;;; #(-1 0 0 -1 1 2)
- (define (make-kmp-restart-vector pattern . maybe-c=+start+end)
- (let-optionals* maybe-c=+start+end
- ((c= char=? (procedure? c=))
- ((start end) (lambda (args)
- (string-parse-start+end 'make-kmp-restart-vector
- pattern args))))
- (let* ((rvlen (- end start))
- (rv (make-vector rvlen -1)))
- (if (> rvlen 0)
- (let ((rvlen-1 (- rvlen 1))
- (c0 (string-ref pattern start)))
- ;; Here's the main loop. We have set rv[0] ... rv[i].
- ;; K = I + START -- it is the corresponding index into PATTERN.
- (let lp1 ((i 0) (j -1) (k start))
- (if (< i rvlen-1)
- ;; lp2 invariant:
- ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1]
- ;; or j = -1.
- (let lp2 ((j j))
- (cond ((= j -1)
- (let ((i1 (+ 1 i)))
- (if (not (c= (string-ref pattern (+ k 1)) c0))
- (vector-set! rv i1 0))
- (lp1 i1 0 (+ k 1))))
- ;; pat[(k-j) .. k] matches pat[start..start+j].
- ((c= (string-ref pattern k) (string-ref pattern (+ j start)))
- (let* ((i1 (+ 1 i))
- (j1 (+ 1 j)))
- (vector-set! rv i1 j1)
- (lp1 i1 j1 (+ k 1))))
- (else (lp2 (vector-ref rv j)))))))))
- rv)))
- ;;; We've matched I chars from PAT. C is the next char from the search string.
- ;;; Return the new I after handling C.
- ;;;
- ;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START
- ;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched
- ;;; are
- ;;; PAT[PAT-START .. PAT-START + I].
- ;;;
- ;;; It's *not* an oversight that there is no friendly error checking or
- ;;; defaulting of arguments. This is a low-level, inner-loop procedure
- ;;; that we want integrated/inlined into the point of call.
- (define (kmp-step pat rv c i c= p-start)
- (let lp ((i i))
- (if (c= c (string-ref pat (+ i p-start))) ; Match =>
- (+ i 1) ; Done.
- (let ((i (vector-ref rv i))) ; Back up in PAT.
- (if (= i -1) 0 ; Can't back up further.
- (lp i)))))) ; Keep trying for match.
- ;;; Zip through S[start,end), looking for a match of PAT. Assume we've
- ;;; already matched the first I chars of PAT when we commence at S[start].
- ;;; - <0: If we find a match *ending* at index J, return -J.
- ;;; - >=0: If we get to the end of the S[start,end) span without finding
- ;;; a complete match, return the number of chars from PAT we'd matched
- ;;; when we ran off the end.
- ;;;
- ;;; This is useful for searching *across* buffers -- that is, when your
- ;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop
- ;;; for speed.
- (define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end)
- (check-arg vector? rv 'string-kmp-partial-search)
- (let-optionals* c=+p-start+s-start+s-end
- ((c= char=? (procedure? c=))
- (p-start 0 (and (integer? p-start) (exact? p-start) (<= 0 p-start)))
- ((s-start s-end) (lambda (args)
- (string-parse-start+end 'string-kmp-partial-search
- s args))))
- (let ((patlen (vector-length rv)))
- (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i) (< i patlen)))
- i 'string-kmp-partial-search)
- ;; Enough prelude. Here's the actual code.
- (let lp ((si s-start) ; An index into S.
- (vi i)) ; An index into RV.
- (cond ((= vi patlen) (- si)) ; Win.
- ((= si s-end) vi) ; Ran off the end.
- (else ; Match s[si] & loop.
- (let ((c (string-ref s si)))
- (lp (+ si 1)
- (let lp2 ((vi vi)) ; This is just KMP-STEP.
- (if (c= c (string-ref pat (+ vi p-start)))
- (+ vi 1)
- (let ((vi (vector-ref rv vi)))
- (if (= vi -1) 0
- (lp2 vi)))))))))))))
- ;;; Misc
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; (string-null? s)
- ;;; (string-reverse s [start end])
- ;;; (string-reverse! s [start end])
- ;;; (reverse-list->string clist)
- ;;; (string->list s [start end])
- (define (string-null? s) (zero? (string-length s)))
- (define (string-reverse s . maybe-start+end)
- (let-string-start+end (start end) string-reverse s maybe-start+end
- (let* ((len (- end start))
- (ans (make-string len)))
- (do ((i start (+ i 1))
- (j (- len 1) (- j 1)))
- ((< j 0))
- (string-set! ans j (string-ref s i)))
- ans)))
- (define (string-reverse! s . maybe-start+end)
- (let-string-start+end (start end) string-reverse! s maybe-start+end
- (do ((i (- end 1) (- i 1))
- (j start (+ j 1)))
- ((<= i j))
- (let ((ci (string-ref s i)))
- (string-set! s i (string-ref s j))
- (string-set! s j ci)))))
- (define (reverse-list->string clist)
- (let* ((len (length clist))
- (s (make-string len)))
- (do ((i (- len 1) (- i 1)) (clist clist (cdr clist)))
- ((not (pair? clist)))
- (string-set! s i (car clist)))
- s))
- ;(define (string->list s . maybe-start+end)
- ; (apply string-fold-right cons '() s maybe-start+end))
- (define (string->list s . maybe-start+end)
- (let-string-start+end (start end) string->list s maybe-start+end
- (do ((i (- end 1) (- i 1))
- (ans '() (cons (string-ref s i) ans)))
- ((< i start) ans))))
- ;;; Defined by R5RS, so commented out here.
- ;(define (list->string lis) (string-unfold null? car cdr lis))
- ;;; string-concatenate string-list -> string
- ;;; string-concatenate/shared string-list -> string
- ;;; string-append/shared s ... -> string
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; STRING-APPEND/SHARED has license to return a string that shares storage
- ;;; with any of its arguments. In particular, if there is only one non-empty
- ;;; string amongst its parameters, it is permitted to return that string as
- ;;; its result. STRING-APPEND, by contrast, always allocates new storage.
- ;;;
- ;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of
- ;;; strings, which they concatenate into a result string. STRING-CONCATENATE
- ;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may
- ;;; not) return a result that shares storage with any of its arguments. In
- ;;; particular, if it is applied to a singleton list, it is permitted to
- ;;; return the car of that list as its value.
- (define (string-append/shared . strings) (string-concatenate/shared strings))
- (define (string-concatenate/shared strings)
- (let lp ((strings strings) (nchars 0) (first #f))
- (cond ((pair? strings) ; Scan the args, add up total
- (let* ((string (car strings)) ; length, remember 1st
- (tail (cdr strings)) ; non-empty string.
- (slen (string-length string)))
- (if (zero? slen)
- (lp tail nchars first)
- (lp tail (+ nchars slen) (or first strings)))))
- ((zero? nchars) "")
- ;; Just one non-empty string! Return it.
- ((= nchars (string-length (car first))) (car first))
- (else (let ((ans (make-string nchars)))
- (let lp ((strings first) (i 0))
- (if (pair? strings)
- (let* ((s (car strings))
- (slen (string-length s)))
- (%string-copy! ans i s 0 slen)
- (lp (cdr strings) (+ i slen)))))
- ans)))))
-
- ; Alas, Scheme 48's APPLY blows up if you have many, many arguments.
- ;(define (string-concatenate strings) (apply string-append strings))
- ;;; Here it is written out. I avoid using REDUCE to add up string lengths
- ;;; to avoid non-R5RS dependencies.
- (define (string-concatenate strings)
- (let* ((total (do ((strings strings (cdr strings))
- (i 0 (+ i (string-length (car strings)))))
- ((not (pair? strings)) i)))
- (ans (make-string total)))
- (let lp ((i 0) (strings strings))
- (if (pair? strings)
- (let* ((s (car strings))
- (slen (string-length s)))
- (%string-copy! ans i s 0 slen)
- (lp (+ i slen) (cdr strings)))))
- ans))
-
- ;;; Defined by R5RS, so commented out here.
- ;(define (string-append . strings) (string-concatenate strings))
- ;;; string-concatenate-reverse string-list [final-string end] -> string
- ;;; string-concatenate-reverse/shared string-list [final-string end] -> string
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Return
- ;;; (string-concatenate
- ;;; (reverse
- ;;; (cons (substring final-string 0 end) string-list)))
- (define (string-concatenate-reverse string-list . maybe-final+end)
- (let-optionals* maybe-final+end ((final "" (string? final))
- (end (string-length final)
- (and (integer? end)
- (exact? end)
- (<= 0 end (string-length final)))))
- (let ((len (let lp ((sum 0) (lis string-list))
- (if (pair? lis)
- (lp (+ sum (string-length (car lis))) (cdr lis))
- sum))))
- (%finish-string-concatenate-reverse len string-list final end))))
- (define (string-concatenate-reverse/shared string-list . maybe-final+end)
- (let-optionals* maybe-final+end ((final "" (string? final))
- (end (string-length final)
- (and (integer? end)
- (exact? end)
- (<= 0 end (string-length final)))))
- ;; Add up the lengths of all the strings in STRING-LIST; also get a
- ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length
- ;; string starts.
- (let lp ((len 0) (nzlist #f) (lis string-list))
- (if (pair? lis)
- (let ((slen (string-length (car lis))))
- (lp (+ len slen)
- (if (or nzlist (zero? slen)) nzlist lis)
- (cdr lis)))
- (cond ((zero? len) (substring/shared final 0 end))
- ;; LEN > 0, so NZLIST is non-empty.
- ((and (zero? end) (= len (string-length (car nzlist))))
- (car nzlist))
- (else (%finish-string-concatenate-reverse len nzlist final end)))))))
- (define (%finish-string-concatenate-reverse len string-list final end)
- (let ((ans (make-string (+ end len))))
- (%string-copy! ans len final 0 end)
- (let lp ((i len) (lis string-list))
- (if (pair? lis)
- (let* ((s (car lis))
- (lis (cdr lis))
- (slen (string-length s))
- (i (- i slen)))
- (%string-copy! ans i s 0 slen)
- (lp i lis))))
- ans))
- ;;; string-replace s1 s2 start1 end1 [start2 end2] -> string
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Replace S1[START1,END1) with S2[START2,END2).
- (define (string-replace s1 s2 start1 end1 . maybe-start+end)
- (check-substring-spec 'string-replace s1 start1 end1)
- (let-string-start+end (start2 end2) string-replace s2 maybe-start+end
- (let* ((slen1 (string-length s1))
- (sublen2 (- end2 start2))
- (alen (+ (- slen1 (- end1 start1)) sublen2))
- (ans (make-string alen)))
- (%string-copy! ans 0 s1 0 start1)
- (%string-copy! ans start1 s2 start2 end2)
- (%string-copy! ans (+ start1 sublen2) s1 end1 slen1)
- ans)))
- ;;; string-tokenize s [token-set start end] -> list
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Break S up into a list of token strings, where a token is a maximal
- ;;; non-empty contiguous sequence of chars belonging to TOKEN-SET.
- ;;; (string-tokenize "hello, world") => ("hello," "world")
- (define (string-tokenize s . token-chars+start+end)
- (let-optionals* token-chars+start+end
- ((token-chars char-set:graphic (char-set? token-chars)) rest)
- (let-string-start+end (start end) string-tokenize s rest
- (let lp ((i end) (ans '()))
- (cond ((and (< start i) (string-index-right s token-chars start i)) =>
- (lambda (tend-1)
- (let ((tend (+ 1 tend-1)))
- (cond ((string-skip-right s token-chars start tend-1) =>
- (lambda (tstart-1)
- (lp tstart-1
- (cons (substring s (+ 1 tstart-1) tend)
- ans))))
- (else (cons (substring s start tend) ans))))))
- (else ans))))))
- ;;; xsubstring s from [to start end] -> string
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; S is a string; START and END are optional arguments that demarcate
- ;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole
- ;;; string). Replicate this substring up and down index space, in both the
- ;; positive and negative directions. For example, if S = "abcdefg", START=3,
- ;;; and END=6, then we have the conceptual bidirectionally-infinite string
- ;;; ... d e f d e f d e f d e f d e f d e f d e f ...
- ;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ...
- ;;; XSUBSTRING returns the substring of this string beginning at index FROM,
- ;;; and ending at TO (which defaults to FROM+(END-START)).
- ;;;
- ;;; You can use XSUBSTRING in many ways:
- ;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab"
- ;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd"
- ;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca"
- ;;;
- ;;; Note that
- ;;; - The FROM/TO indices give a half-open range -- the characters from
- ;;; index FROM up to, but not including index TO.
- ;;; - The FROM/TO indices are not in terms of the index space for string S.
- ;;; They are in terms of the replicated index space of the substring
- ;;; defined by S, START, and END.
- ;;;
- ;;; It is an error if START=END -- although this is allowed by special
- ;;; dispensation when FROM=TO.
- (define (xsubstring s from . maybe-to+start+end)
- (check-arg (lambda (val) (and (integer? val) (exact? val)))
- from 'xsubstring)
- (receive (to start end)
- (if (pair? maybe-to+start+end)
- (let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end)
- (let ((to (car maybe-to+start+end)))
- (check-arg (lambda (val) (and (integer? val)
- (exact? val)
- (<= from val)))
- to 'xsubstring)
- (values to start end)))
- (let ((slen (string-length (check-arg string? s 'xsubstring))))
- (values (+ from slen) 0 slen)))
- (let ((slen (- end start))
- (anslen (- to from)))
- (cond ((zero? anslen) "")
- ((zero? slen) (assertion-violation 'xsubstring
- "Cannot replicate empty (sub)string"
- s from to start end))
- ((= 1 slen) ; Fast path for 1-char replication.
- (make-string anslen (string-ref s start)))
- ;; Selected text falls entirely within one span.
- ((= (floor (/ from slen)) (floor (/ to slen)))
- (substring s (+ start (modulo from slen))
- (+ start (modulo to slen))))
- ;; Selected text requires multiple spans.
- (else (let ((ans (make-string anslen)))
- (%multispan-repcopy! ans 0 s from to start end)
- ans))))))
- ;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Exactly the same as xsubstring, but the extracted text is written
- ;;; into the string TARGET starting at index TSTART.
- ;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy
- ;;; a string on top of itself.
- (define (string-xcopy! target tstart s sfrom . maybe-sto+start+end)
- (check-arg (lambda (val) (and (integer? val) (exact? val)))
- sfrom 'string-xcopy!)
- (receive (sto start end)
- (if (pair? maybe-sto+start+end)
- (let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end)
- (let ((sto (car maybe-sto+start+end)))
- (check-arg (lambda (val) (and (integer? val) (exact? val)))
- sto 'string-xcopy!)
- (values sto start end)))
- (let ((slen (string-length s)))
- (values (+ sfrom slen) 0 slen)))
- (let* ((tocopy (- sto sfrom))
- (tend (+ tstart tocopy))
- (slen (- end start)))
- (check-substring-spec 'string-xcopy! target tstart tend)
- (cond ((zero? tocopy))
- ((zero? slen)
- (assertion-violation 'string-xcopy!
- "Cannot replicate empty (sub)string"
- target tstart s sfrom sto start end))
- ((= 1 slen) ; Fast path for 1-char replication.
- (string-fill! target (string-ref s start) tstart tend))
- ;; Selected text falls entirely within one span.
- ((= (floor (/ sfrom slen)) (floor (/ sto slen)))
- (%string-copy! target tstart s
- (+ start (modulo sfrom slen))
- (+ start (modulo sto slen))))
- ;; Multi-span copy.
- (else (%multispan-repcopy! target tstart s sfrom sto start end))))))
- ;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY!
- ;;; Internal -- not exported, no careful arg checking.
- (define (%multispan-repcopy! target tstart s sfrom sto start end)
- (let* ((slen (- end start))
- (i0 (+ start (modulo sfrom slen)))
- (total-chars (- sto sfrom)))
- ;; Copy the partial span @ the beginning
- (%string-copy! target tstart s i0 end)
-
- (let* ((ncopied (- end i0)) ; We've copied this many.
- (nleft (- total-chars ncopied)) ; # chars left to copy.
- (nspans (quotient nleft slen))) ; # whole spans to copy
-
- ;; Copy the whole spans in the middle.
- (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index.
- (nspans nspans (- nspans 1))) ; # spans to copy
- ((zero? nspans)
- ;; Copy the partial-span @ the end & we're done.
- (%string-copy! target i s start (+ start (- total-chars (- i tstart)))))
- (%string-copy! target i s start end))))); Copy a whole span.
- ;;; (string-join string-list [delimiter grammar]) => string
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Paste strings together using the delimiter string.
- ;;;
- ;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz"
- ;;;
- ;;; DELIMITER defaults to a single space " "
- ;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix}
- ;;; and defaults to 'infix.
- ;;;
- ;;; I could rewrite this more efficiently -- precompute the length of the
- ;;; answer string, then allocate & fill it in iteratively. Using
- ;;; STRING-CONCATENATE is less efficient.
- (define (string-join strings . delim+grammar)
- (let-optionals* delim+grammar ((delim " " (string? delim))
- (grammar 'infix))
- (let ((buildit (lambda (lis final)
- (let recur ((lis lis))
- (if (pair? lis)
- (cons delim (cons (car lis) (recur (cdr lis))))
- final)))))
- (cond ((pair? strings)
- (string-concatenate
- (case grammar
- ((infix strict-infix)
- (cons (car strings) (buildit (cdr strings) '())))
- ((prefix) (buildit strings '()))
- ((suffix)
- (cons (car strings) (buildit (cdr strings) (list delim))))
- (else (assertion-violation 'string-join
- "Illegal join grammar"
- grammar)))))
- ((not (null? strings))
- (assertion-violation 'string-join
- "STRINGS parameter not list." strings))
- ;; STRINGS is ()
- ((eq? grammar 'strict-infix)
- (assertion-violation 'string-join
- "Empty list cannot be joined with STRICT-INFIX grammar."))
- (else ""))))) ; Special-cased for infix grammar.
- ;;; Porting & performance-tuning notes
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; See the section at the beginning of this file on external dependencies.
- ;;;
- ;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro.
- ;;; There are many, many optional arguments in this library; the complexity
- ;;; of parsing, defaulting & type-testing these parameters is handled with the
- ;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can
- ;;; rewrite the uses, port the hairy macro definition (which is implemented
- ;;; using a Clinger-Rees low-level explicit-renaming macro system), or port
- ;;; the simple, high-level definition, which is less efficient.
- ;;;
- ;;; There is a fair amount of argument checking. This is, strictly speaking,
- ;;; unnecessary -- the actual body of the procedures will blow up if, say, a
- ;;; START/END index is improper. However, the error message will not be as
- ;;; good as if the error were caught at the "higher level." Also, a very, very
- ;;; smart Scheme compiler may be able to exploit having the type checks done
- ;;; early, so that the actual body of the procedures can assume proper values.
- ;;; This isn't likely; this kind of compiler technology isn't common any
- ;;; longer.
- ;;;
- ;;; The overhead of optional-argument parsing is irritating. The optional
- ;;; arguments must be consed into a rest list on entry, and then parsed out.
- ;;; Function call should be a matter of a few register moves and a jump; it
- ;;; should not involve heap allocation! Your Scheme system may have a superior
- ;;; non-R5RS optional-argument system that can eliminate this overhead. If so,
- ;;; then this is a prime candidate for optimising these procedures,
- ;;; *especially* the many optional START/END index parameters.
- ;;;
- ;;; Note that optional arguments are also a barrier to procedure integration.
- ;;; If your Scheme system permits you to specify alternate entry points
- ;;; for a call when the number of optional arguments is known in a manner
- ;;; that enables inlining/integration, this can provide performance
- ;;; improvements.
- ;;;
- ;;; There is enough *explicit* error checking that *all* string-index
- ;;; operations should *never* produce a bounds error. Period. Feel like
- ;;; living dangerously? *Big* performance win to be had by replacing
- ;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops.
- ;;; Similarly, fixnum-specific operators can speed up the arithmetic done on
- ;;; the index values in the inner loops. The only arguments that are not
- ;;; completely error checked are
- ;;; - string lists (complete checking requires time proportional to the
- ;;; length of the list)
- ;;; - procedure arguments, such as char->char maps & predicates.
- ;;; There is no way to check the range & domain of procedures in Scheme.
- ;;; Procedures that take these parameters cannot fully check their
- ;;; arguments. But all other types to all other procedures are fully
- ;;; checked.
- ;;;
- ;;; This does open up the alternate possibility of simply *removing* these
- ;;; checks, and letting the safe primitives raise the errors. On a dumb
- ;;; Scheme system, this would provide speed (by eliminating the redundant
- ;;; error checks) at the cost of error-message clarity.
- ;;;
- ;;; See the comments preceding the hash function code for notes on tuning
- ;;; the default bound so that the code never overflows your implementation's
- ;;; fixnum size into bignum calculation.
- ;;;
- ;;; In an interpreted Scheme, some of these procedures, or the internal
- ;;; routines with % prefixes, are excellent candidates for being rewritten
- ;;; in C. Consider STRING-HASH, %STRING-COMPARE, the
- ;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX &
- ;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED,
- ;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!.
- ;;;
- ;;; It would also be nice to have the ability to mark some of these
- ;;; routines as candidates for inlining/integration.
- ;;;
- ;;; All the %-prefixed routines in this source code are written
- ;;; to be called internally to this library. They do *not* perform
- ;;; friendly error checks on the inputs; they assume everything is
- ;;; proper. They also do not take optional arguments. These two properties
- ;;; save calling overhead and enable procedure integration -- but they
- ;;; are not appropriate for exported routines.
- ;;; Copyright details
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; The prefix/suffix and comparison routines in this code had (extremely
- ;;; distant) origins in MIT Scheme's string lib, and was substantially
- ;;; reworked by Olin Shivers (shivers@ai.mit.edu) 9/98. As such, it is
- ;;; covered by MIT Scheme's open source copyright. See below for details.
- ;;;
- ;;; The KMP string-search code was influenced by implementations written
- ;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this
- ;;; version was written from scratch by myself.
- ;;;
- ;;; The remainder of this code was written from scratch by myself for scsh.
- ;;; The scsh copyright is a BSD-style open source copyright. See below for
- ;;; details.
- ;;; -Olin Shivers
- ;;; MIT Scheme copyright terms
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; This material was developed by the Scheme project at the Massachusetts
- ;;; Institute of Technology, Department of Electrical Engineering and
- ;;; Computer Science. Permission to copy and modify this software, to
- ;;; redistribute either the original software or a modified version, and
- ;;; to use this software for any purpose is granted, subject to the
- ;;; following restrictions and understandings.
- ;;;
- ;;; 1. Any copy made of this software must include this copyright notice
- ;;; in full.
- ;;;
- ;;; 2. Users of this software agree to make their best efforts (a) to
- ;;; return to the MIT Scheme project any improvements or extensions that
- ;;; they make, so that these may be included in future releases; and (b)
- ;;; to inform MIT of noteworthy uses of this software.
- ;;;
- ;;; 3. All materials developed as a consequence of the use of this
- ;;; software shall duly acknowledge such use, in accordance with the usual
- ;;; standards of acknowledging credit in academic research.
- ;;;
- ;;; 4. MIT has made no warrantee or representation that the operation of
- ;;; this software will be error-free, and MIT is under no obligation to
- ;;; provide any services, by way of maintenance, update, or otherwise.
- ;;;
- ;;; 5. In conjunction with products arising from the use of this material,
- ;;; there shall be no use of the name of the Massachusetts Institute of
- ;;; Technology nor of any adaptation thereof in any advertising,
- ;;; promotional, or sales literature without prior written consent from
- ;;; MIT in each case.
- ;;; Scsh copyright terms
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; All rights reserved.
- ;;;
- ;;; Redistribution and use in source and binary forms, with or without
- ;;; modification, are permitted provided that the following conditions
- ;;; are met:
- ;;; 1. Redistributions of source code must retain the above copyright
- ;;; notice, this list of conditions and the following disclaimer.
- ;;; 2. Redistributions in binary form must reproduce the above copyright
- ;;; notice, this list of conditions and the following disclaimer in the
- ;;; documentation and/or other materials provided with the distribution.
- ;;; 3. The name of the authors may not be used to endorse or promote products
- ;;; derived from this software without specific prior written permission.
- ;;;
- ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
- ;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- ;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- ;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- ;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- ;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- ;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- ;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- ;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|