delndups.scm 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. ;;; The sort package -- delete neighboring duplicate elts
  2. ;;; Copyright (c) 1998 by Olin Shivers.
  3. ;;; This code is open-source; see the end of the file for porting and
  4. ;;; more copyright information.
  5. ;;; Olin Shivers 11/98.
  6. ;;; Problem:
  7. ;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
  8. ;;; of elements in the answer vector. This is arguably a very efficient thing
  9. ;;; to do, but it might blow out on a system with a limited stack but a big
  10. ;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
  11. ;;; push more than a certain number of frames, then allocate a final answer,
  12. ;;; copying all the chunks into the answer. But it's much more complex code.
  13. ;;; Exports:
  14. ;;; (list-delete-neighbor-dups = lis) -> list
  15. ;;; (list-delete-neighbor-dups! = lis) -> list
  16. ;;; (vector-delete-neighbor-dups = v [start end]) -> vector
  17. ;;; (vector-delete-neighbor-dups! = v [start end]) -> end'
  18. ;;; These procedures delete adjacent duplicate elements from a list or
  19. ;;; a vector, using a given element equality procedure. The first or leftmost
  20. ;;; element of a run of equal elements is the one that survives. The list
  21. ;;; or vector is not otherwise disordered.
  22. ;;;
  23. ;;; These procedures are linear time -- much faster than the O(n^2) general
  24. ;;; duplicate-elt deletors that do not assume any "bunching" of elements.
  25. ;;; If you want to delete duplicate elements from a large list or vector,
  26. ;;; sort the elements to bring equal items together, then use one of these
  27. ;;; procedures -- for a total time of O(n lg n).
  28. ;;; LIST-DELETE-NEIGHBOR-DUPS
  29. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  30. ;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
  31. ;;; from simple to complex. RECUR's contract: Strip off any leading X's from
  32. ;;; LIS, and return that list neighbor-dup-deleted.
  33. ;;;
  34. ;;; The final version
  35. ;;; - shares a common subtail between the input & output list, up to 1024
  36. ;;; elements;
  37. ;;; - Needs no more than 1024 stack frames.
  38. ;;; Simplest version.
  39. ;;; - Always allocates a fresh list / never shares storage.
  40. ;;; - Needs N stack frames, if answer is length N.
  41. (define (list-delete-neighbor-dups = lis)
  42. (if (pair? lis)
  43. (let ((x0 (car lis)))
  44. (cons x0 (let recur ((x0 x0) (xs (cdr lis)))
  45. (if (pair? xs)
  46. (let ((x1 (car xs))
  47. (x2+ (cdr xs)))
  48. (if (= x0 x1)
  49. (recur x0 x2+) ; Loop, actually.
  50. (cons x1 (recur x1 x2+))))
  51. xs))))
  52. lis))
  53. ;;; This version tries to use cons cells from input by sharing longest
  54. ;;; common tail between input & output. Still needs N stack frames, for ans
  55. ;;; of length N.
  56. (define (list-delete-neighbor-dups = lis)
  57. (if (pair? lis)
  58. (let* ((x0 (car lis))
  59. (xs (cdr lis))
  60. (ans (let recur ((x0 x0) (xs xs))
  61. (if (pair? xs)
  62. (let ((x1 (car xs))
  63. (x2+ (cdr xs)))
  64. (if (= x0 x1)
  65. (recur x0 x2+)
  66. (let ((ans-tail (recur x1 x2+)))
  67. (if (eq? ans-tail x2+) xs
  68. (cons x1 ans-tail)))))
  69. xs))))
  70. (if (eq? ans xs) lis (cons x0 ans)))
  71. lis))
  72. ;;; LIST-DELETE-NEIGHBOR-DUPS!
  73. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  74. ;;; This code runs in constant list space, constant stack, and also
  75. ;;; does only the minimum SET-CDR!'s necessary.
  76. (define (list-delete-neighbor-dups! = lis)
  77. (if (pair? lis)
  78. (let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
  79. (if (pair? lis)
  80. (let ((lis-elt (car lis))
  81. (next (cdr lis)))
  82. (if (= prev-elt lis-elt)
  83. ;; We found the first elts of a run of dups, so we know
  84. ;; we're going to have to do a SET-CDR!. Scan to the end of
  85. ;; the run, do the SET-CDR!, and loop on LP1.
  86. (let lp2 ((lis next))
  87. (if (pair? lis)
  88. (let ((lis-elt (car lis))
  89. (next (cdr lis)))
  90. (if (= prev-elt lis-elt)
  91. (lp2 next)
  92. (begin (set-cdr! prev lis)
  93. (lp1 lis lis-elt next))))
  94. (set-cdr! prev lis))) ; Ran off end => quit.
  95. (lp1 lis lis-elt next))))))
  96. lis)
  97. (define (vector-delete-neighbor-dups elt= v . maybe-start+end)
  98. (call-with-values
  99. (lambda () (vector-start+end v maybe-start+end))
  100. (lambda (start end)
  101. (if (< start end)
  102. (let* ((x (vector-ref v start))
  103. (ans (let recur ((x x) (i start) (j 1))
  104. (if (< i end)
  105. (let ((y (vector-ref v i))
  106. (nexti (+ i 1)))
  107. (if (elt= x y)
  108. (recur x nexti j)
  109. (let ((ansvec (recur y nexti (+ j 1))))
  110. (vector-set! ansvec j y)
  111. ansvec)))
  112. (make-vector j)))))
  113. (vector-set! ans 0 x)
  114. ans)
  115. '#()))))
  116. ;;; Packs the surviving elements to the left, in range [start,end'),
  117. ;;; and returns END'.
  118. (define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
  119. (call-with-values
  120. (lambda () (vector-start+end v maybe-start+end))
  121. (lambda (start end)
  122. (if (>= start end)
  123. end
  124. ;; To eliminate unnecessary copying (read elt i then write the value
  125. ;; back at index i), we scan until we find the first dup.
  126. (let skip ((j start) (vj (vector-ref v start)))
  127. (let ((j+1 (+ j 1)))
  128. (if (>= j+1 end)
  129. end
  130. (let ((vj+1 (vector-ref v j+1)))
  131. (if (not (elt= vj vj+1))
  132. (skip j+1 vj+1)
  133. ;; OK -- j & j+1 are dups, so we're committed to moving
  134. ;; data around. In lp2, v[start,j] is what we've done;
  135. ;; v[k,end) is what we have yet to handle.
  136. (let lp2 ((j j) (vj vj) (k (+ j 2)))
  137. (let lp3 ((k k))
  138. (if (>= k end)
  139. (+ j 1) ; Done.
  140. (let ((vk (vector-ref v k))
  141. (k+1 (+ k 1)))
  142. (if (elt= vj vk)
  143. (lp3 k+1)
  144. (let ((j+1 (+ j 1)))
  145. (vector-set! v j+1 vk)
  146. (lp2 j+1 vk k+1))))))))))))))))
  147. ;;; Copyright
  148. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  149. ;;; This code is
  150. ;;; Copyright (c) 1998 by Olin Shivers.
  151. ;;; The terms are: You may do as you please with this code, as long as
  152. ;;; you do not delete this notice or hold me responsible for any outcome
  153. ;;; related to its use.
  154. ;;;
  155. ;;; Blah blah blah. Don't you think source files should contain more lines
  156. ;;; of code than copyright notice?
  157. ;;;
  158. ;;; Code porting
  159. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  160. ;;;
  161. ;;; If your Scheme has a faster mechanism for handling optional arguments
  162. ;;; (e.g., Chez), you should definitely port over to it. Note that argument
  163. ;;; defaulting and error-checking are interleaved -- you don't have to
  164. ;;; error-check defaulted START/END args to see if they are fixnums that are
  165. ;;; legal vector indices for the corresponding vector, etc.