equal.scm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. ;;; Equal?
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; Implementation of 'equal?' based on the interleaved union-find and
  18. ;;; tree equality with precheck algorithm from "Efficient
  19. ;;; Nondestructive Equality Checking for Trees and Graphs"
  20. ;;;
  21. ;;; See: https://cs.indiana.edu/~dyb/pubs/equal.pdf
  22. ;;;
  23. ;;; Code:
  24. (library (hoot equal)
  25. (export equal?)
  26. (import (hoot bitvectors)
  27. (hoot bytevectors)
  28. (hoot boxes)
  29. (hoot eq)
  30. (hoot inline-wasm)
  31. (hoot match)
  32. (hoot numbers)
  33. (hoot not)
  34. (hoot pairs)
  35. (only (hoot primitives) %struct-ref %struct-vtable)
  36. (hoot records)
  37. (hoot strings)
  38. (hoot syntax)
  39. (hoot values)
  40. (hoot vectors))
  41. (define (equal? x y)
  42. ;; TODO: Add pseudorandom number generator
  43. (define (random x) x)
  44. ;; Use low-level wasm hashq tables to avoid a cycle with (hoot
  45. ;; hashtables).
  46. (define (make-eq-hashtable)
  47. (%inline-wasm
  48. '(func (result (ref eq))
  49. (call $make-hash-table))))
  50. (define (hashtable-ref table key)
  51. (%inline-wasm
  52. '(func (param $table (ref eq))
  53. (param $key (ref eq))
  54. (result (ref eq))
  55. (call $hashq-ref
  56. (ref.cast $hash-table (local.get $table))
  57. (local.get $key)
  58. (ref.i31 (i32.const 1))))
  59. table key))
  60. (define (hashtable-set! table key value)
  61. (%inline-wasm
  62. '(func (param $table (ref eq))
  63. (param $key (ref eq))
  64. (param $value (ref eq))
  65. (call $hashq-set!
  66. (ref.cast $hash-table (local.get $table))
  67. (local.get $key)
  68. (local.get $value)))
  69. table key value))
  70. (define (record-type-compare vtable)
  71. (%struct-ref vtable 7))
  72. (define (bytevector=? x y)
  73. (let ((n (bytevector-length x)))
  74. (and (= n (bytevector-length y))
  75. (let lp ((i 0))
  76. (or (= i n)
  77. (and (eqv? (bytevector-u8-ref x i)
  78. (bytevector-u8-ref y i))
  79. (lp (+ i 1))))))))
  80. (define (bitvector=? x y)
  81. (let ((n (bitvector-length x)))
  82. (and (= n (bitvector-length y))
  83. (let lp ((i 0))
  84. (or (= i n)
  85. (and (eqv? (bitvector-ref x i)
  86. (bitvector-ref y i))
  87. (lp (+ i 1))))))))
  88. ;; Bounds for precheck and fast/slow interleave paths. These
  89. ;; magic numbers are taken straight out of the aforementioned
  90. ;; paper.
  91. (define k0 400)
  92. (define kb -40)
  93. ;; The precheck does a simple tree equality test with a bound on
  94. ;; the number of checks, recurring up to k times. This means that
  95. ;; the precheck will terminate even when given cyclic inputs.
  96. (define (pre? x y k)
  97. (cond
  98. ((eq? x y) k)
  99. ((pair? x)
  100. (and (pair? y)
  101. (if (<= k 0)
  102. k
  103. (let ((k (pre? (car x) (car y) (- k 1))))
  104. (and k (pre? (cdr x) (cdr y) k))))))
  105. ((vector? x)
  106. (and (vector? y)
  107. (let ((n (vector-length x)))
  108. (and (= n (vector-length y))
  109. (let lp ((i 0) (k k))
  110. (if (or (= i n) (<= k 0))
  111. k
  112. (let ((k (pre? (vector-ref x i) (vector-ref y i) (- k 1))))
  113. (and k (lp (+ i 1) k)))))))))
  114. ((record? x)
  115. (and (record? y)
  116. (let ((vtable (%struct-vtable x)))
  117. (and (eq? vtable (%struct-vtable y))
  118. (match (record-type-compare vtable)
  119. (#f #f)
  120. (compare
  121. ;; Since the record type comparison procedure
  122. ;; is external to 'equal?', we need to create a
  123. ;; wrapper that updates the counter after each
  124. ;; call. Opaque records will never call
  125. ;; 'equal?*', so 'k*' is lazily initialized to
  126. ;; detect this case.
  127. (let ((k* #f))
  128. (define (equal?* x y)
  129. (unless k* (set! k* k))
  130. (and (> k* 0)
  131. (match (pre? x y k*)
  132. (#f
  133. (set! k* #f)
  134. #f)
  135. (k
  136. (set! k* (- k 1))
  137. ;; The values were equal, but if
  138. ;; the precheck has reached its
  139. ;; bound we will lie and say the
  140. ;; values were not equal so
  141. ;; 'compare' will stop.
  142. (> k 0)))))
  143. (compare x y equal?*)
  144. k*)))))
  145. k))
  146. ((string? x)
  147. (and (string? y) (string=? x y) k))
  148. ((bytevector? x)
  149. (and (bytevector? y) (bytevector=? x y) k))
  150. ((bitvector? x)
  151. (and (bitvector? y) (bitvector=? x y) k))
  152. (else (and (eqv? x y) k))))
  153. (define (interleave? ht x y k)
  154. ;; Union-find algorithm with splitting path compression.
  155. (define (union-find x y)
  156. (define (find b)
  157. (let ((n (box-ref b)))
  158. (if (number? n)
  159. b
  160. ;; Equivalence classes form chains of boxes. To
  161. ;; reduce pointer chasing as the set grows, the path
  162. ;; is compressed during lookup via the "splitting"
  163. ;; technique. Each box in the chain becomes linked to
  164. ;; the one two beyond it.
  165. (let loop ((b b) (n n))
  166. (let ((nn (box-ref n)))
  167. (if (number? nn)
  168. n
  169. (begin
  170. (box-set! b nn)
  171. (loop n nn))))))))
  172. (let ((bx (hashtable-ref ht x))
  173. (by (hashtable-ref ht y)))
  174. (if (not bx)
  175. (if (not by)
  176. ;; Neither value has been visited before. Create a
  177. ;; new equivalence class for them to share.
  178. (let ((b (make-box 1)))
  179. (hashtable-set! ht x b)
  180. (hashtable-set! ht y b)
  181. #f)
  182. ;; x hasn't been visited before, but y has. Use y's
  183. ;; equivalence class.
  184. (let ((ry (find by)))
  185. (hashtable-set! ht x ry)
  186. #f))
  187. (if (not by)
  188. ;; y hasn't been visited before, but x has. Use x's
  189. ;; equivalence class.
  190. (let ((rx (find bx)))
  191. (hashtable-set! ht y rx)
  192. #f)
  193. ;; Both x and y have been visited before.
  194. (let ((rx (find bx))
  195. (ry (find by)))
  196. ;; If x and y share an equivalance class then they
  197. ;; are equal and we're done. Otherwise, the
  198. ;; representative of the smaller class is linked
  199. ;; to the representative of the larger class and
  200. ;; the size is updated to reflect the size of the
  201. ;; new class.
  202. (or (eq? rx ry)
  203. (let ((nx (box-ref rx))
  204. (ny (box-ref ry)))
  205. (if (> nx ny)
  206. (begin
  207. (box-set! ry rx)
  208. (box-set! rx (+ nx ny))
  209. #f)
  210. (begin
  211. (box-set! rx ry)
  212. (box-set! ry (+ ny nx))
  213. #f)))))))))
  214. (define (e? x y k)
  215. (if (<= k 0)
  216. (if (= k kb)
  217. ;; The fast path is taken when k hits the lower bound,
  218. ;; resetting k in the process. The random k value
  219. ;; "reduces the likelihood of repeatedly tripping on
  220. ;; worst-case behavior in cases where sizes of the
  221. ;; input graphs happen to be related to the chosen
  222. ;; bounds in a bad way."
  223. (fast? x y (random (* 2 k0)))
  224. (slow? x y k))
  225. (fast? x y k)))
  226. (define (slow? x y k)
  227. (cond
  228. ((eq? x y) k)
  229. ((pair? x)
  230. (and (pair? y)
  231. (if (union-find x y)
  232. ;; Reset k back to zero to re-enter slow? on the
  233. ;; basis that if one equivalence is found then it
  234. ;; is likely that more will be found.
  235. 0
  236. (let ((k (e? (car x) (car y) (- k 1))))
  237. (and k (e? (cdr x) (cdr y) k))))))
  238. ((vector? x)
  239. (and (vector? y)
  240. (let ((length (vector-length x)))
  241. (and (= length (vector-length y))
  242. (if (union-find x y)
  243. 0
  244. (let lp ((i 0) (k (- k 1)))
  245. (if (= i length)
  246. k
  247. (let ((k (e? (vector-ref x i) (vector-ref y i) k)))
  248. (and k (lp (+ i 1) k))))))))))
  249. ((record? x)
  250. (and (record? y)
  251. (let ((vtable (%struct-vtable x)))
  252. (and (eq? vtable (%struct-vtable y))
  253. (match (record-type-compare vtable)
  254. (#f #f)
  255. (compare
  256. (let ((k* #f))
  257. (define (equal?* x y)
  258. (unless k* (set! k* k))
  259. (if (union-find x y)
  260. (begin
  261. (set! k* 0)
  262. #t)
  263. (match (e? x y k*)
  264. (#f
  265. (set! k* #f)
  266. #f)
  267. (k
  268. (set! k* (- k 1))
  269. (> k 0)))))
  270. k*)))))))
  271. ((string? x)
  272. (and (string? y) (string=? x y) k))
  273. ((bytevector? x)
  274. (and (bytevector? y) (bytevector=? x y) k))
  275. ((bitvector? x)
  276. (and (bitvector? y) (bitvector=? x y) k))
  277. (else (and (eqv? x y) k))))
  278. (define (fast? x y k)
  279. (let ((k (- k 1)))
  280. (cond
  281. ((eq? x y) k)
  282. ((pair? x)
  283. (and (pair? y)
  284. (let ((k (e? (car x) (car y) k)))
  285. (and k (e? (cdr x) (cdr y) k)))))
  286. ((vector? x)
  287. (and (vector? y)
  288. (let ((length (vector-length x)))
  289. (and (= length (vector-length y))
  290. (let lp ((i 0) (k k))
  291. (if (= i length)
  292. k
  293. (let ((k (e? (vector-ref x i) (vector-ref y i) k)))
  294. (and k (lp (+ i 1) k)))))))))
  295. ((record? x)
  296. (and (record? y)
  297. (let ((vtable (%struct-vtable x)))
  298. (and (eq? vtable (%struct-vtable y))
  299. (match (record-type-compare vtable)
  300. (#f #f)
  301. (compare
  302. (let ((k* #f))
  303. (define (equal?* x y)
  304. (unless k* (set! k* k))
  305. (match (e? x y k*)
  306. (#f
  307. (set! k* #f)
  308. #f)
  309. (k
  310. (set! k* (- k 1))
  311. (> k 0))))
  312. (and (compare x y equal?*) k))))))))
  313. ((string? x)
  314. (and (string? y) (string=? x y) k))
  315. ((bytevector? x)
  316. (and (bytevector? y) (bytevector=? x y) k))
  317. ((bitvector? x)
  318. (and (bitvector? y) (bitvector=? x y) k))
  319. (else (and (eqv? x y) k)))))
  320. (and (e? x y k) #t))
  321. ;; Perform the precheck before falling back to the slower
  322. ;; interleave method. For atoms and small trees, the precheck
  323. ;; will be sufficient to determine equality.
  324. (let ((k (pre? x y k0)))
  325. ;; The precheck returns #f if not equal, a number greater than
  326. ;; zero if equal, or 0 if it couldn't determine equality within
  327. ;; k0 checks. For the first two cases, we can return
  328. ;; immediately. For the last case, we proceed to the
  329. ;; interleaved algorithm.
  330. (and k (or (> k 0) (interleave? (make-eq-hashtable) x y 0))))))