vqsort2.scm 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. ;;; The SRFI-32 sort package -- quick sort -*- Scheme -*-
  2. ;;; Copyright (c) 2002 by Olin Shivers.
  3. ;;; This code is open-source; see the end of the file for porting and
  4. ;;; more copyright information.
  5. ;;; Olin Shivers 2002/7.
  6. ;;; (quick-sort < v [start end]) -> vector
  7. ;;; (quick-sort! < v [start end]) -> unspecific
  8. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  9. ;;; The algorithm is a standard quicksort, but the partition loop is fancier,
  10. ;;; arranging the vector into a left part that is <, a middle region that is
  11. ;;; =, and a right part that is > the pivot. Here's how it is done:
  12. ;;; The partition loop divides the range being partitioned into five
  13. ;;; subranges:
  14. ;;; =======<<<<<<<<<?????????>>>>>>>=======
  15. ;;; where = marks a value that is equal the pivot, < marks a value that
  16. ;;; is less than the pivot, ? marks a value that hasn't been scanned, and
  17. ;;; > marks a value that is greater than the pivot. Let's consider the
  18. ;;; left-to-right scan. If it checks a ? value that is <, it keeps scanning.
  19. ;;; If the ? value is >, we stop the scan -- we are ready to start the
  20. ;;; right-to-left scan and then do a swap. But if the rightward scan checks
  21. ;;; a ? value that is =, we swap it *down* to the end of the initial chunk
  22. ;;; of ====='s -- we exchange it with the leftmost < value -- and then
  23. ;;; continue our rightward scan. The leftwards scan works in a similar
  24. ;;; fashion, scanning past > elements, stopping on a < element, and swapping
  25. ;;; up = elements. When we are done, we have a picture like this
  26. ;;; ========<<<<<<<<<<<<>>>>>>>>>>=========
  27. ;;; Then swap the = elements up into the middle of the vector to get
  28. ;;; this:
  29. ;;; <<<<<<<<<<<<=================>>>>>>>>>>
  30. ;;; Then recurse on the <'s and >'s. Work out all the tricky little
  31. ;;; boundary cases, and you're done.
  32. ;;;
  33. ;;; Other tricks:
  34. ;;; - This quicksort also makes some effort to pick the pivot well -- it uses
  35. ;;; the median of three elements as the partition pivot, so pathological n^2
  36. ;;; run time is much rarer (but not eliminated completely). If you really
  37. ;;; wanted to get fancy, you could use a random number generator to choose
  38. ;;; pivots. The key to this trick is that you only need to pick one random
  39. ;;; number for each *level* of recursion -- i.e. you only need (lg n) random
  40. ;;; numbers.
  41. ;;; - After the partition, we *recurse* on the smaller of the two pending
  42. ;;; regions, then *tail-recurse* (iterate) on the larger one. This guarantees
  43. ;;; we use no more than lg(n) stack frames, worst case.
  44. ;;; - There are two ways to finish off the sort.
  45. ;;; A Recurse down to regions of size 10, then sort each such region using
  46. ;;; insertion sort.
  47. ;;; B Recurse down to regions of size 10, then sort *the entire vector*
  48. ;;; using insertion sort.
  49. ;;; We do A. Each choice has a cost. Choice A has more overhead to invoke
  50. ;;; all the separate insertion sorts -- choice B only calls insertion sort
  51. ;;; once. But choice B will call the comparison function *more times* --
  52. ;;; it will unnecessarily compare elt 9 of one segment to elt 0 of the
  53. ;;; following segment. The overhead of choice A is linear in the length
  54. ;;; of the vector, but *otherwise independent of the algorithm's parameters*.
  55. ;;; I.e., it's a *fixed*, *small* constant factor. The cost of the extra
  56. ;;; comparisons made by choice B, however, is dependent on an externality:
  57. ;;; the comparison function passed in by the client. This can be made
  58. ;;; arbitrarily bad -- that is, the constant factor *isn't* fixed by the
  59. ;;; sort algorithm; instead, it's determined by the comparison function.
  60. ;;; If your comparison function is very, very slow, you want to eliminate
  61. ;;; every single one that you can. Choice A limits the potential badness,
  62. ;;; so that is what we do.
  63. (define (vector-quick-sort! < v . maybe-start+end)
  64. (call-with-values
  65. (lambda () (vector-start+end v maybe-start+end))
  66. (lambda (start end)
  67. (%quick-sort! < v start end))))
  68. (define (vector-quick-sort < v . maybe-start+end)
  69. (call-with-values
  70. (lambda () (vector-start+end v maybe-start+end))
  71. (lambda (start end)
  72. (let ((ans (make-vector (- end start))))
  73. (vector-portion-copy! ans v start end)
  74. (%quick-sort! < ans 0 (- end start))
  75. ans))))
  76. ;;; %QUICK-SORT is not exported.
  77. ;;; Preconditions:
  78. ;;; V vector
  79. ;;; START END fixnums
  80. ;;; 0 <= START, END <= (vector-length V)
  81. ;;; If these preconditions are ensured by the cover functions, you
  82. ;;; can safely change this code to use unsafe fixnum arithmetic and vector
  83. ;;; indexing ops, for *huge* speedup.
  84. ;;;
  85. ;;; We bail out to insertion sort for small ranges; feel free to tune the
  86. ;;; crossover -- it's just a random guess. If you don't have the insertion
  87. ;;; sort routine, just kill that branch of the IF and change the recursion
  88. ;;; test to (< 1 (- r l)) -- the code is set up to work that way.
  89. (define (%quick-sort! elt< v start end)
  90. ;; Swap the N outer pairs of the range [l,r).
  91. (define (swap l r n)
  92. (if (> n 0)
  93. (let ((x (vector-ref v l))
  94. (r-1 (- r 1)))
  95. (vector-set! v l (vector-ref v r-1))
  96. (vector-set! v r-1 x)
  97. (swap (+ l 1) r-1 (- n 1)))))
  98. ;; Choose the median of V[l], V[r], and V[middle] for the pivot.
  99. (define (median v1 v2 v3)
  100. (call-with-values
  101. (lambda () (if (elt< v1 v2) (values v1 v2) (values v2 v1)))
  102. (lambda (little big)
  103. (if (elt< big v3)
  104. big
  105. (if (elt< little v3) v3 little)))))
  106. (let recur ((l start) (r end)) ; Sort the range [l,r).
  107. (if (< 10 (- r l)) ; Ten: the gospel according to Sedgewick.
  108. (let ((pivot (median (vector-ref v l)
  109. (vector-ref v (quotient (+ l r) 2))
  110. (vector-ref v (- r 1)))))
  111. ;; Everything in these loops is driven by the invariants expressed
  112. ;; in the little pictures & the corresponding l,i,j,k,m,r indices
  113. ;; and the associated ranges.
  114. ;; =======<<<<<<<<<?????????>>>>>>>=======
  115. ;; l i j k m r
  116. ;; [l,i) [i,j) [j,k] (k,m] (m,r)
  117. (letrec ((lscan (lambda (i j k m) ; left-to-right scan
  118. (let lp ((i i) (j j))
  119. (if (> j k)
  120. (done i j m)
  121. (let ((x (vector-ref v j)))
  122. (cond ((elt< x pivot) (lp i (+ j 1)))
  123. ((elt< pivot x) (rscan i j k m))
  124. (else ; Equal
  125. (if (< i j)
  126. (begin (vector-set! v j (vector-ref v i))
  127. (vector-set! v i x)))
  128. (lp (+ i 1) (+ j 1)))))))))
  129. ;; =======<<<<<<<<<>????????>>>>>>>=======
  130. ;; l i j k m r
  131. ;; [l,i) [i,j) j (j,k] (k,m] (m,r)
  132. (rscan (lambda (i j k m) ; right-to-left scan
  133. (let lp ((k k) (m m))
  134. (if (<= k j)
  135. (done i j m)
  136. (let* ((x (vector-ref v k)))
  137. (cond ((elt< pivot x) (lp (- k 1) m))
  138. ((elt< x pivot) ; Swap j & k & lscan.
  139. (vector-set! v k (vector-ref v j))
  140. (vector-set! v j x)
  141. (lscan i (+ j 1) (- k 1) m))
  142. (else ; x=pivot
  143. (if (< k m)
  144. (begin (vector-set! v k (vector-ref v m))
  145. (vector-set! v m x)))
  146. (lp (- k 1) (- m 1)))))))))
  147. ;; =======<<<<<<<<<<<<<>>>>>>>>>>>=======
  148. ;; l i j m r
  149. ;; [l,i) [i,j) [j,m] (m,r)
  150. (done (lambda (i j m)
  151. (let ((num< (- j i))
  152. (num> (+ 1 (- m j)))
  153. (num=l (- i l))
  154. (num=r (- (- r m) 1)))
  155. (swap l j (min num< num=l)) ; Swap ='s into
  156. (swap j r (min num> num=r)) ; the middle.
  157. ;; Recur on the <'s and >'s. Recurring on the
  158. ;; smaller range and iterating on the bigger
  159. ;; range ensures O(lg n) stack frames, worst case.
  160. (cond ((<= num< num>)
  161. (recur l (+ l num<))
  162. (recur (- r num>) r))
  163. (else
  164. (recur (- r num>) r)
  165. (recur l (+ l num<))))))))
  166. (let ((r-1 (- r 1)))
  167. (lscan l l r-1 r-1))))
  168. ;; Small segment => punt to insert sort.
  169. ;; Use the dangerous subprimitive.
  170. (%vector-insert-sort! elt< v l r))))