123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- ;;; The sort package -- delete neighboring duplicate elts
- ;;; Copyright (c) 1998 by Olin Shivers.
- ;;; This code is open-source; see the end of the file for porting and
- ;;; more copyright information.
- ;;; Olin Shivers 11/98.
- ;;; Problem:
- ;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
- ;;; of elements in the answer vector. This is arguably a very efficient thing
- ;;; to do, but it might blow out on a system with a limited stack but a big
- ;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
- ;;; push more than a certain number of frames, then allocate a final answer,
- ;;; copying all the chunks into the answer. But it's much more complex code.
- ;;; Exports:
- ;;; (list-delete-neighbor-dups = lis) -> list
- ;;; (list-delete-neighbor-dups! = lis) -> list
- ;;; (vector-delete-neighbor-dups = v [start end]) -> vector
- ;;; (vector-delete-neighbor-dups! = v [start end]) -> end'
- ;;; These procedures delete adjacent duplicate elements from a list or
- ;;; a vector, using a given element equality procedure. The first or leftmost
- ;;; element of a run of equal elements is the one that survives. The list
- ;;; or vector is not otherwise disordered.
- ;;;
- ;;; These procedures are linear time -- much faster than the O(n^2) general
- ;;; duplicate-elt deletors that do not assume any "bunching" of elements.
- ;;; If you want to delete duplicate elements from a large list or vector,
- ;;; sort the elements to bring equal items together, then use one of these
- ;;; procedures -- for a total time of O(n lg n).
- ;;; LIST-DELETE-NEIGHBOR-DUPS
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
- ;;; from simple to complex. RECUR's contract: Strip off any leading X's from
- ;;; LIS, and return that list neighbor-dup-deleted.
- ;;;
- ;;; The final version
- ;;; - shares a common subtail between the input & output list, up to 1024
- ;;; elements;
- ;;; - Needs no more than 1024 stack frames.
- ;;; Simplest version.
- ;;; - Always allocates a fresh list / never shares storage.
- ;;; - Needs N stack frames, if answer is length N.
- (define (list-delete-neighbor-dups = lis)
- (if (pair? lis)
- (let ((x0 (car lis)))
- (cons x0 (let recur ((x0 x0) (xs (cdr lis)))
- (if (pair? xs)
- (let ((x1 (car xs))
- (x2+ (cdr xs)))
- (if (= x0 x1)
- (recur x0 x2+) ; Loop, actually.
- (cons x1 (recur x1 x2+))))
- xs))))
- lis))
- ;;; This version tries to use cons cells from input by sharing longest
- ;;; common tail between input & output. Still needs N stack frames, for ans
- ;;; of length N.
- (define (list-delete-neighbor-dups = lis)
- (if (pair? lis)
- (let* ((x0 (car lis))
- (xs (cdr lis))
- (ans (let recur ((x0 x0) (xs xs))
- (if (pair? xs)
- (let ((x1 (car xs))
- (x2+ (cdr xs)))
- (if (= x0 x1)
- (recur x0 x2+)
- (let ((ans-tail (recur x1 x2+)))
- (if (eq? ans-tail x2+) xs
- (cons x1 ans-tail)))))
- xs))))
- (if (eq? ans xs) lis (cons x0 ans)))
- lis))
- ;;; LIST-DELETE-NEIGHBOR-DUPS!
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; This code runs in constant list space, constant stack, and also
- ;;; does only the minimum SET-CDR!'s necessary.
- (define (list-delete-neighbor-dups! = lis)
- (if (pair? lis)
- (let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
- (if (pair? lis)
- (let ((lis-elt (car lis))
- (next (cdr lis)))
- (if (= prev-elt lis-elt)
- ;; We found the first elts of a run of dups, so we know
- ;; we're going to have to do a SET-CDR!. Scan to the end of
- ;; the run, do the SET-CDR!, and loop on LP1.
- (let lp2 ((lis next))
- (if (pair? lis)
- (let ((lis-elt (car lis))
- (next (cdr lis)))
- (if (= prev-elt lis-elt)
- (lp2 next)
- (begin (set-cdr! prev lis)
- (lp1 lis lis-elt next))))
- (set-cdr! prev lis))) ; Ran off end => quit.
- (lp1 lis lis-elt next))))))
- lis)
- (define (vector-delete-neighbor-dups elt= v . maybe-start+end)
- (call-with-values
- (lambda () (vector-start+end v maybe-start+end))
- (lambda (start end)
- (if (< start end)
- (let* ((x (vector-ref v start))
- (ans (let recur ((x x) (i start) (j 1))
- (if (< i end)
- (let ((y (vector-ref v i))
- (nexti (+ i 1)))
- (if (elt= x y)
- (recur x nexti j)
- (let ((ansvec (recur y nexti (+ j 1))))
- (vector-set! ansvec j y)
- ansvec)))
- (make-vector j)))))
- (vector-set! ans 0 x)
- ans)
- '#()))))
- ;;; Packs the surviving elements to the left, in range [start,end'),
- ;;; and returns END'.
- (define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
- (call-with-values
- (lambda () (vector-start+end v maybe-start+end))
- (lambda (start end)
- (if (>= start end)
- end
- ;; To eliminate unnecessary copying (read elt i then write the value
- ;; back at index i), we scan until we find the first dup.
- (let skip ((j start) (vj (vector-ref v start)))
- (let ((j+1 (+ j 1)))
- (if (>= j+1 end)
- end
- (let ((vj+1 (vector-ref v j+1)))
- (if (not (elt= vj vj+1))
- (skip j+1 vj+1)
- ;; OK -- j & j+1 are dups, so we're committed to moving
- ;; data around. In lp2, v[start,j] is what we've done;
- ;; v[k,end) is what we have yet to handle.
- (let lp2 ((j j) (vj vj) (k (+ j 2)))
- (let lp3 ((k k))
- (if (>= k end)
- (+ j 1) ; Done.
- (let ((vk (vector-ref v k))
- (k+1 (+ k 1)))
- (if (elt= vj vk)
- (lp3 k+1)
- (let ((j+1 (+ j 1)))
- (vector-set! v j+1 vk)
- (lp2 j+1 vk k+1))))))))))))))))
-
- ;;; Copyright
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; This code is
- ;;; Copyright (c) 1998 by Olin Shivers.
- ;;; The terms are: You may do as you please with this code, as long as
- ;;; you do not delete this notice or hold me responsible for any outcome
- ;;; related to its use.
- ;;;
- ;;; Blah blah blah. Don't you think source files should contain more lines
- ;;; of code than copyright notice?
- ;;;
- ;;; Code porting
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;
- ;;; If your Scheme has a faster mechanism for handling optional arguments
- ;;; (e.g., Chez), you should definitely port over to it. Note that argument
- ;;; defaulting and error-checking are interleaved -- you don't have to
- ;;; error-check defaulted START/END args to see if they are fixnums that are
- ;;; legal vector indices for the corresponding vector, etc.
|