equal.scm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. ;;; Equal?
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; Implementation of 'equal?' based on the interleaved union-find and
  18. ;;; tree equality with precheck algorithm from "Efficient
  19. ;;; Nondestructive Equality Checking for Trees and Graphs"
  20. ;;;
  21. ;;; See: https://cs.indiana.edu/~dyb/pubs/equal.pdf
  22. ;;;
  23. ;;; Code:
  24. (library (hoot equal)
  25. (export equal?)
  26. (import (hoot bitvectors)
  27. (hoot bytevectors)
  28. (hoot boxes)
  29. (hoot eq)
  30. (hoot match)
  31. (hoot numbers)
  32. (hoot not)
  33. (hoot pairs)
  34. (hoot primitives)
  35. (hoot records)
  36. (hoot strings)
  37. (hoot values)
  38. (hoot vectors))
  39. (define (equal? x y)
  40. ;; TODO: Add pseudorandom number generator
  41. (define (random x) x)
  42. ;; Use low-level wasm hashq tables to avoid a cycle with (hoot
  43. ;; hashtables).
  44. (define (make-eq-hashtable)
  45. (%inline-wasm
  46. '(func (result (ref eq))
  47. (call $make-hash-table))))
  48. (define (hashtable-ref table key)
  49. (%inline-wasm
  50. '(func (param $table (ref eq))
  51. (param $key (ref eq))
  52. (result (ref eq))
  53. (call $hashq-ref
  54. (ref.cast $hash-table (local.get $table))
  55. (local.get $key)
  56. (ref.i31 (i32.const 1))))
  57. table key))
  58. (define (hashtable-set! table key value)
  59. (%inline-wasm
  60. '(func (param $table (ref eq))
  61. (param $key (ref eq))
  62. (param $value (ref eq))
  63. (call $hashq-set!
  64. (ref.cast $hash-table (local.get $table))
  65. (local.get $key)
  66. (local.get $value)))
  67. table key value))
  68. (define (record-type-compare vtable)
  69. (%struct-ref vtable 7))
  70. (define (bytevector=? x y)
  71. (let ((n (bytevector-length x)))
  72. (and (= n (bytevector-length y))
  73. (let lp ((i 0))
  74. (or (= i n)
  75. (and (eqv? (bytevector-u8-ref x i)
  76. (bytevector-u8-ref y i))
  77. (lp (+ i 1))))))))
  78. (define (bitvector=? x y)
  79. (let ((n (bitvector-length x)))
  80. (and (= n (bitvector-length y))
  81. (let lp ((i 0))
  82. (or (= i n)
  83. (and (eqv? (bitvector-ref x i)
  84. (bitvector-ref y i))
  85. (lp (+ i 1))))))))
  86. ;; Bounds for precheck and fast/slow interleave paths. These
  87. ;; magic numbers are taken straight out of the aforementioned
  88. ;; paper.
  89. (define k0 400)
  90. (define kb -40)
  91. ;; The precheck does a simple tree equality test with a bound on
  92. ;; the number of checks, recurring up to k times. This means that
  93. ;; the precheck will terminate even when given cyclic inputs.
  94. (define (pre? x y k)
  95. (cond
  96. ((eq? x y) k)
  97. ((pair? x)
  98. (and (pair? y)
  99. (if (<= k 0)
  100. k
  101. (let ((k (pre? (car x) (car y) (- k 1))))
  102. (and k (pre? (cdr x) (cdr y) k))))))
  103. ((vector? x)
  104. (and (vector? y)
  105. (let ((n (vector-length x)))
  106. (and (= n (vector-length y))
  107. (let lp ((i 0) (k k))
  108. (if (or (= i n) (<= k 0))
  109. k
  110. (let ((k (pre? (vector-ref x i) (vector-ref y i) (- k 1))))
  111. (and k (lp (+ i 1) k)))))))))
  112. ((record? x)
  113. (and (record? y)
  114. (let ((vtable (%struct-vtable x)))
  115. (and (eq? vtable (%struct-vtable y))
  116. (match (record-type-compare vtable)
  117. (#f #f)
  118. (compare
  119. ;; Since the record type comparison procedure
  120. ;; is external to 'equal?', we need to create a
  121. ;; wrapper that updates the counter after each
  122. ;; call. Opaque records will never call
  123. ;; 'equal?*', so 'k*' is lazily initialized to
  124. ;; detect this case.
  125. (let ((k* #f))
  126. (define (equal?* x y)
  127. (unless k* (set! k* k))
  128. (and (> k* 0)
  129. (match (pre? x y k*)
  130. (#f
  131. (set! k* #f)
  132. #f)
  133. (k
  134. (set! k* (- k 1))
  135. ;; The values were equal, but if
  136. ;; the precheck has reached its
  137. ;; bound we will lie and say the
  138. ;; values were not equal so
  139. ;; 'compare' will stop.
  140. (> k 0)))))
  141. (compare x y equal?*)
  142. k*)))))
  143. k))
  144. ((string? x)
  145. (and (string? y) (string=? x y) k))
  146. ((bytevector? x)
  147. (and (bytevector? y) (bytevector=? x y) k))
  148. ((bitvector? x)
  149. (and (bitvector? y) (bitvector=? x y) k))
  150. (else (and (eqv? x y) k))))
  151. (define (interleave? ht x y k)
  152. ;; Union-find algorithm with splitting path compression.
  153. (define (union-find x y)
  154. (define (find b)
  155. (let ((n (box-ref b)))
  156. (if (number? n)
  157. b
  158. ;; Equivalence classes form chains of boxes. To
  159. ;; reduce pointer chasing as the set grows, the path
  160. ;; is compressed during lookup via the "splitting"
  161. ;; technique. Each box in the chain becomes linked to
  162. ;; the one two beyond it.
  163. (let loop ((b b) (n n))
  164. (let ((nn (box-ref n)))
  165. (if (number? nn)
  166. n
  167. (begin
  168. (box-set! b nn)
  169. (loop n nn))))))))
  170. (let ((bx (hashtable-ref ht x))
  171. (by (hashtable-ref ht y)))
  172. (if (not bx)
  173. (if (not by)
  174. ;; Neither value has been visited before. Create a
  175. ;; new equivalence class for them to share.
  176. (let ((b (make-box 1)))
  177. (hashtable-set! ht x b)
  178. (hashtable-set! ht y b)
  179. #f)
  180. ;; x hasn't been visited before, but y has. Use y's
  181. ;; equivalence class.
  182. (let ((ry (find by)))
  183. (hashtable-set! ht x ry)
  184. #f))
  185. (if (not by)
  186. ;; y hasn't been visited before, but x has. Use x's
  187. ;; equivalence class.
  188. (let ((rx (find bx)))
  189. (hashtable-set! ht y rx)
  190. #f)
  191. ;; Both x and y have been visited before.
  192. (let ((rx (find bx))
  193. (ry (find by)))
  194. ;; If x and y share an equivalance class then they
  195. ;; are equal and we're done. Otherwise, the
  196. ;; representative of the smaller class is linked
  197. ;; to the representative of the larger class and
  198. ;; the size is updated to reflect the size of the
  199. ;; new class.
  200. (or (eq? rx ry)
  201. (let ((nx (box-ref rx))
  202. (ny (box-ref ry)))
  203. (if (> nx ny)
  204. (begin
  205. (box-set! ry rx)
  206. (box-set! rx (+ nx ny))
  207. #f)
  208. (begin
  209. (box-set! rx ry)
  210. (box-set! ry (+ ny nx))
  211. #f)))))))))
  212. (define (e? x y k)
  213. (if (<= k 0)
  214. (if (= k kb)
  215. ;; The fast path is taken when k hits the lower bound,
  216. ;; resetting k in the process. The random k value
  217. ;; "reduces the likelihood of repeatedly tripping on
  218. ;; worst-case behavior in cases where sizes of the
  219. ;; input graphs happen to be related to the chosen
  220. ;; bounds in a bad way."
  221. (fast? x y (random (* 2 k0)))
  222. (slow? x y k))
  223. (fast? x y k)))
  224. (define (slow? x y k)
  225. (cond
  226. ((eq? x y) k)
  227. ((pair? x)
  228. (and (pair? y)
  229. (if (union-find x y)
  230. ;; Reset k back to zero to re-enter slow? on the
  231. ;; basis that if one equivalence is found then it
  232. ;; is likely that more will be found.
  233. 0
  234. (let ((k (e? (car x) (car y) (- k 1))))
  235. (and k (e? (cdr x) (cdr y) k))))))
  236. ((vector? x)
  237. (and (vector? y)
  238. (let ((length (vector-length x)))
  239. (and (= length (vector-length y))
  240. (if (union-find x y)
  241. 0
  242. (let lp ((i 0) (k (- k 1)))
  243. (if (= i length)
  244. k
  245. (let ((k (e? (vector-ref x i) (vector-ref y i) k)))
  246. (and k (lp (+ i 1) k))))))))))
  247. ((record? x)
  248. (and (record? y)
  249. (let ((vtable (%struct-vtable x)))
  250. (and (eq? vtable (%struct-vtable y))
  251. (match (record-type-compare vtable)
  252. (#f #f)
  253. (compare
  254. (let ((k* #f))
  255. (define (equal?* x y)
  256. (unless k* (set! k* k))
  257. (if (union-find x y)
  258. (begin
  259. (set! k* 0)
  260. #t)
  261. (match (e? x y k*)
  262. (#f
  263. (set! k* #f)
  264. #f)
  265. (k
  266. (set! k* (- k 1))
  267. (> k 0)))))
  268. k*)))))))
  269. ((string? x)
  270. (and (string? y) (string=? x y) k))
  271. ((bytevector? x)
  272. (and (bytevector? y) (bytevector=? x y) k))
  273. ((bitvector? x)
  274. (and (bitvector? y) (bitvector=? x y) k))
  275. (else (and (eqv? x y) k))))
  276. (define (fast? x y k)
  277. (let ((k (- k 1)))
  278. (cond
  279. ((eq? x y) k)
  280. ((pair? x)
  281. (and (pair? y)
  282. (let ((k (e? (car x) (car y) k)))
  283. (and k (e? (cdr x) (cdr y) k)))))
  284. ((vector? x)
  285. (and (vector? y)
  286. (let ((length (vector-length x)))
  287. (and (= length (vector-length y))
  288. (let lp ((i 0) (k k))
  289. (if (= i length)
  290. k
  291. (let ((k (e? (vector-ref x i) (vector-ref y i) k)))
  292. (and k (lp (+ i 1) k)))))))))
  293. ((record? x)
  294. (and (record? y)
  295. (let ((vtable (%struct-vtable x)))
  296. (and (eq? vtable (%struct-vtable y))
  297. (match (record-type-compare vtable)
  298. (#f #f)
  299. (compare
  300. (let ((k* #f))
  301. (define (equal?* x y)
  302. (unless k* (set! k* k))
  303. (match (e? x y k*)
  304. (#f
  305. (set! k* #f)
  306. #f)
  307. (k
  308. (set! k* (- k 1))
  309. (> k 0))))
  310. (and (compare x y equal?*) k))))))))
  311. ((string? x)
  312. (and (string? y) (string=? x y) k))
  313. ((bytevector? x)
  314. (and (bytevector? y) (bytevector=? x y) k))
  315. ((bitvector? x)
  316. (and (bitvector? y) (bitvector=? x y) k))
  317. (else (and (eqv? x y) k)))))
  318. (and (e? x y k) #t))
  319. ;; Perform the precheck before falling back to the slower
  320. ;; interleave method. For atoms and small trees, the precheck
  321. ;; will be sufficient to determine equality.
  322. (let ((k (pre? x y k0)))
  323. ;; The precheck returns #f if not equal, a number greater than
  324. ;; zero if equal, or 0 if it couldn't determine equality within
  325. ;; k0 checks. For the first two cases, we can return
  326. ;; immediately. For the last case, we proceed to the
  327. ;; interleaved algorithm.
  328. (and k (or (> k 0) (interleave? (make-eq-hashtable) x y 0))))))