slot-allocation.scm 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. ;; Continuation-passing style (CPS) intermediate language (IL)
  2. ;; Copyright (C) 2013, 2014, 2015 Free Software Foundation, Inc.
  3. ;;;; This library is free software; you can redistribute it and/or
  4. ;;;; modify it under the terms of the GNU Lesser General Public
  5. ;;;; License as published by the Free Software Foundation; either
  6. ;;;; version 3 of the License, or (at your option) any later version.
  7. ;;;;
  8. ;;;; This library is distributed in the hope that it will be useful,
  9. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. ;;;; Lesser General Public License for more details.
  12. ;;;;
  13. ;;;; You should have received a copy of the GNU Lesser General Public
  14. ;;;; License along with this library; if not, write to the Free Software
  15. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  16. ;;; Commentary:
  17. ;;;
  18. ;;; A module to assign stack slots to variables in a CPS term.
  19. ;;;
  20. ;;; Code:
  21. (define-module (language cps slot-allocation)
  22. #:use-module (ice-9 match)
  23. #:use-module (srfi srfi-1)
  24. #:use-module (srfi srfi-9)
  25. #:use-module (srfi srfi-11)
  26. #:use-module (srfi srfi-26)
  27. #:use-module (language cps)
  28. #:use-module (language cps utils)
  29. #:use-module (language cps intmap)
  30. #:use-module (language cps intset)
  31. #:export (allocate-slots
  32. lookup-slot
  33. lookup-maybe-slot
  34. lookup-representation
  35. lookup-constant-value
  36. lookup-maybe-constant-value
  37. lookup-nlocals
  38. lookup-call-proc-slot
  39. lookup-parallel-moves
  40. lookup-slot-map))
  41. (define-record-type $allocation
  42. (make-allocation slots representations constant-values call-allocs
  43. shuffles frame-sizes)
  44. allocation?
  45. ;; A map of VAR to slot allocation. A slot allocation is an integer,
  46. ;; if the variable has been assigned a slot.
  47. ;;
  48. (slots allocation-slots)
  49. ;; A map of VAR to representation. A representation is 'scm, 'f64,
  50. ;; 'u64, or 's64.
  51. ;;
  52. (representations allocation-representations)
  53. ;; A map of VAR to constant value, for variables with constant values.
  54. ;;
  55. (constant-values allocation-constant-values)
  56. ;; A map of LABEL to /call allocs/, for expressions that continue to
  57. ;; $kreceive continuations: non-tail calls and $prompt expressions.
  58. ;;
  59. ;; A call alloc contains two pieces of information: the call's /proc
  60. ;; slot/ and a /dead slot map/. The proc slot indicates the slot of a
  61. ;; procedure in a procedure call, or where the procedure would be in a
  62. ;; multiple-value return.
  63. ;;
  64. ;; The dead slot map indicates, what slots should be ignored by GC
  65. ;; when marking the frame. A dead slot map is a bitfield, as an
  66. ;; integer.
  67. ;;
  68. (call-allocs allocation-call-allocs)
  69. ;; A map of LABEL to /parallel moves/. Parallel moves shuffle locals
  70. ;; into position for a $call, $callk, or $values, or shuffle returned
  71. ;; values back into place in a $kreceive.
  72. ;;
  73. ;; A set of moves is expressed as an ordered list of (SRC . DST)
  74. ;; moves, where SRC and DST are slots. This may involve a temporary
  75. ;; variable.
  76. ;;
  77. (shuffles allocation-shuffles)
  78. ;; The number of locals for a $kclause.
  79. ;;
  80. (frame-sizes allocation-frame-sizes))
  81. (define-record-type $call-alloc
  82. (make-call-alloc proc-slot slot-map)
  83. call-alloc?
  84. (proc-slot call-alloc-proc-slot)
  85. (slot-map call-alloc-slot-map))
  86. (define (lookup-maybe-slot var allocation)
  87. (intmap-ref (allocation-slots allocation) var (lambda (_) #f)))
  88. (define (lookup-slot var allocation)
  89. (intmap-ref (allocation-slots allocation) var))
  90. (define (lookup-representation var allocation)
  91. (intmap-ref (allocation-representations allocation) var))
  92. (define *absent* (list 'absent))
  93. (define (lookup-constant-value var allocation)
  94. (let ((value (intmap-ref (allocation-constant-values allocation) var
  95. (lambda (_) *absent*))))
  96. (when (eq? value *absent*)
  97. (error "Variable does not have constant value" var))
  98. value))
  99. (define (lookup-maybe-constant-value var allocation)
  100. (let ((value (intmap-ref (allocation-constant-values allocation) var
  101. (lambda (_) *absent*))))
  102. (if (eq? value *absent*)
  103. (values #f #f)
  104. (values #t value))))
  105. (define (lookup-call-alloc k allocation)
  106. (intmap-ref (allocation-call-allocs allocation) k))
  107. (define (lookup-call-proc-slot k allocation)
  108. (or (call-alloc-proc-slot (lookup-call-alloc k allocation))
  109. (error "Call has no proc slot" k)))
  110. (define (lookup-parallel-moves k allocation)
  111. (intmap-ref (allocation-shuffles allocation) k))
  112. (define (lookup-slot-map k allocation)
  113. (or (call-alloc-slot-map (lookup-call-alloc k allocation))
  114. (error "Call has no slot map" k)))
  115. (define (lookup-nlocals k allocation)
  116. (intmap-ref (allocation-frame-sizes allocation) k))
  117. (define-syntax-rule (persistent-intmap2 exp)
  118. (call-with-values (lambda () exp)
  119. (lambda (a b)
  120. (values (persistent-intmap a) (persistent-intmap b)))))
  121. (define (compute-defs-and-uses cps)
  122. "Return two LABEL->VAR... maps indicating values defined at and used
  123. by a label, respectively."
  124. (define (vars->intset vars)
  125. (fold (lambda (var set) (intset-add set var)) empty-intset vars))
  126. (persistent-intmap2
  127. (intmap-fold
  128. (lambda (label cont defs uses)
  129. (define (get-defs k)
  130. (match (intmap-ref cps k)
  131. (($ $kargs names vars) (vars->intset vars))
  132. (_ empty-intset)))
  133. (define (return d u)
  134. (values (intmap-add! defs label d)
  135. (intmap-add! uses label u)))
  136. (match cont
  137. (($ $kfun src meta self)
  138. (return (intset self) empty-intset))
  139. (($ $kargs _ _ ($ $continue k src exp))
  140. (match exp
  141. ((or ($ $const) ($ $closure))
  142. (return (get-defs k) empty-intset))
  143. (($ $call proc args)
  144. (return (get-defs k) (intset-add (vars->intset args) proc)))
  145. (($ $callk _ proc args)
  146. (return (get-defs k) (intset-add (vars->intset args) proc)))
  147. (($ $primcall name args)
  148. (return (get-defs k) (vars->intset args)))
  149. (($ $branch kt ($ $primcall name args))
  150. (return empty-intset (vars->intset args)))
  151. (($ $branch kt ($ $values args))
  152. (return empty-intset (vars->intset args)))
  153. (($ $values args)
  154. (return (get-defs k) (vars->intset args)))
  155. (($ $prompt escape? tag handler)
  156. (return empty-intset (intset tag)))))
  157. (($ $kclause arity body alt)
  158. (return (get-defs body) empty-intset))
  159. (($ $kreceive arity kargs)
  160. (return (get-defs kargs) empty-intset))
  161. (($ $ktail)
  162. (return empty-intset empty-intset))))
  163. cps
  164. empty-intmap
  165. empty-intmap)))
  166. (define (compute-reverse-control-flow-order preds)
  167. "Return a LABEL->ORDER bijection where ORDER is a contiguous set of
  168. integers starting from 0 and incrementing in sort order."
  169. ;; This is more involved than forward control flow because not all
  170. ;; live labels are reachable from the tail.
  171. (persistent-intmap
  172. (fold2 (lambda (component order n)
  173. (intset-fold (lambda (label order n)
  174. (values (intmap-add! order label n)
  175. (1+ n)))
  176. component order n))
  177. (reverse (compute-sorted-strongly-connected-components preds))
  178. empty-intmap 0)))
  179. (define* (add-prompt-control-flow-edges conts succs #:key complete?)
  180. "For all prompts in DFG in the range [MIN-LABEL, MIN-LABEL +
  181. LABEL-COUNT), invoke F with arguments PROMPT, HANDLER, and BODY for each
  182. body continuation in the prompt."
  183. (define (intset-filter pred set)
  184. (intset-fold (lambda (i set)
  185. (if (pred i) set (intset-remove set i)))
  186. set
  187. set))
  188. (define (intset-any pred set)
  189. (intset-fold (lambda (i res)
  190. (if (or res (pred i)) #t res))
  191. set
  192. #f))
  193. (define (visit-prompt label handler succs)
  194. ;; FIXME: It isn't correct to use all continuations reachable from
  195. ;; the prompt, because that includes continuations outside the
  196. ;; prompt body. This point is moot if the handler's control flow
  197. ;; joins with the the body, as is usually but not always the case.
  198. ;;
  199. ;; One counter-example is when the handler contifies an infinite
  200. ;; loop; in that case we compute a too-large prompt body. This
  201. ;; error is currently innocuous, but we should fix it at some point.
  202. ;;
  203. ;; The fix is to end the body at the corresponding "pop" primcall,
  204. ;; if any.
  205. (let ((body (intset-subtract (compute-function-body conts label)
  206. (compute-function-body conts handler))))
  207. (define (out-or-back-edge? label)
  208. ;; Most uses of visit-prompt-control-flow don't need every body
  209. ;; continuation, and would be happy getting called only for
  210. ;; continuations that postdominate the rest of the body. Unless
  211. ;; you pass #:complete? #t, we only invoke F on continuations
  212. ;; that can leave the body, or on back-edges in loops.
  213. ;;
  214. ;; You would think that looking for the final "pop" primcall
  215. ;; would be sufficient, but that is incorrect; it's possible for
  216. ;; a loop in the prompt body to be contified, and that loop need
  217. ;; not continue to the pop if it never terminates. The pop could
  218. ;; even be removed by DCE, in that case.
  219. (intset-any (lambda (succ)
  220. (or (not (intset-ref body succ))
  221. (<= succ label)))
  222. (intmap-ref succs label)))
  223. (intset-fold (lambda (pred succs)
  224. (intmap-replace succs pred handler intset-add))
  225. (if complete? body (intset-filter out-or-back-edge? body))
  226. succs)))
  227. (intmap-fold
  228. (lambda (label cont succs)
  229. (match cont
  230. (($ $kargs _ _
  231. ($ $continue _ _ ($ $prompt escape? tag handler)))
  232. (visit-prompt label handler succs))
  233. (_ succs)))
  234. conts
  235. succs))
  236. (define (rename-keys map old->new)
  237. (persistent-intmap
  238. (intmap-fold (lambda (k v out)
  239. (intmap-add! out (intmap-ref old->new k) v))
  240. map
  241. empty-intmap)))
  242. (define (rename-intset set old->new)
  243. (intset-fold (lambda (old set) (intset-add set (intmap-ref old->new old)))
  244. set empty-intset))
  245. (define (rename-graph graph old->new)
  246. (persistent-intmap
  247. (intmap-fold (lambda (pred succs out)
  248. (intmap-add! out
  249. (intmap-ref old->new pred)
  250. (rename-intset succs old->new)))
  251. graph
  252. empty-intmap)))
  253. (define (compute-live-variables cps defs uses)
  254. "Compute and return two values mapping LABEL->VAR..., where VAR... are
  255. the definitions that are live before and after LABEL, as intsets."
  256. (let* ((succs (add-prompt-control-flow-edges cps (compute-successors cps)))
  257. (preds (invert-graph succs))
  258. (old->new (compute-reverse-control-flow-order preds))
  259. (init (persistent-intmap (intmap-fold
  260. (lambda (old new init)
  261. (intmap-add! init new empty-intset))
  262. old->new empty-intmap))))
  263. (call-with-values
  264. (lambda ()
  265. (solve-flow-equations (rename-graph preds old->new)
  266. init init
  267. (rename-keys defs old->new)
  268. (rename-keys uses old->new)
  269. intset-subtract intset-union intset-union))
  270. (lambda (in out)
  271. ;; As a reverse control-flow problem, the values flowing into a
  272. ;; node are actually the live values after the node executes.
  273. ;; Funny, innit? So we return them in the reverse order.
  274. (let ((new->old (invert-bijection old->new)))
  275. (values (rename-keys out new->old)
  276. (rename-keys in new->old)))))))
  277. (define (compute-needs-slot cps defs uses)
  278. (define (get-defs k) (intmap-ref defs k))
  279. (define (get-uses label) (intmap-ref uses label))
  280. (intmap-fold
  281. (lambda (label cont needs-slot)
  282. (intset-union
  283. needs-slot
  284. (match cont
  285. (($ $kargs _ _ ($ $continue k src exp))
  286. (let ((defs (get-defs label)))
  287. (define (defs+* uses)
  288. (intset-union defs uses))
  289. (define (defs+ use)
  290. (intset-add defs use))
  291. (match exp
  292. (($ $const)
  293. empty-intset)
  294. (($ $primcall (or 'load-f64 'load-u64 'load-s64) (val))
  295. empty-intset)
  296. (($ $primcall 'free-ref (closure slot))
  297. (defs+ closure))
  298. (($ $primcall 'free-set! (closure slot value))
  299. (defs+* (intset closure value)))
  300. (($ $primcall 'cache-current-module! (mod . _))
  301. (defs+ mod))
  302. (($ $primcall 'cached-toplevel-box _)
  303. defs)
  304. (($ $primcall 'cached-module-box _)
  305. defs)
  306. (($ $primcall 'resolve (name bound?))
  307. (defs+ name))
  308. (($ $primcall 'make-vector/immediate (len init))
  309. (defs+ init))
  310. (($ $primcall 'vector-ref/immediate (v i))
  311. (defs+ v))
  312. (($ $primcall 'vector-set!/immediate (v i x))
  313. (defs+* (intset v x)))
  314. (($ $primcall 'allocate-struct/immediate (vtable nfields))
  315. (defs+ vtable))
  316. (($ $primcall 'struct-ref/immediate (s n))
  317. (defs+ s))
  318. (($ $primcall 'struct-set!/immediate (s n x))
  319. (defs+* (intset s x)))
  320. (($ $primcall (or 'add/immediate 'sub/immediate
  321. 'uadd/immediate 'usub/immediate 'umul/immediate
  322. 'ursh/immediate 'ulsh/immediate)
  323. (x y))
  324. (defs+ x))
  325. (($ $primcall 'builtin-ref (idx))
  326. defs)
  327. (_
  328. (defs+* (get-uses label))))))
  329. (($ $kreceive arity k)
  330. ;; Only allocate results of function calls to slots if they are
  331. ;; used.
  332. empty-intset)
  333. (($ $kclause arity body alternate)
  334. (get-defs label))
  335. (($ $kfun src meta self)
  336. (intset self))
  337. (($ $ktail)
  338. empty-intset))))
  339. cps
  340. empty-intset))
  341. (define (compute-lazy-vars cps live-in live-out defs needs-slot)
  342. "Compute and return a set of vars whose allocation can be delayed
  343. until their use is seen. These are \"lazy\" vars. A var is lazy if its
  344. uses are calls, it is always dead after the calls, and if the uses flow
  345. to the definition. A flow continues across a node iff the node kills no
  346. values that need slots, and defines only lazy vars. Calls also kill
  347. flows; there's no sense in trying to juggle a pending frame while there
  348. is an active call."
  349. (define (list->intset list)
  350. (persistent-intset
  351. (fold (lambda (i set) (intset-add! set i)) empty-intset list)))
  352. (let* ((succs (compute-successors cps))
  353. (gens (intmap-map
  354. (lambda (label cont)
  355. (match cont
  356. (($ $kargs _ _ ($ $continue _ _ ($ $call proc args)))
  357. (intset-subtract (intset-add (list->intset args) proc)
  358. (intmap-ref live-out label)))
  359. (($ $kargs _ _ ($ $continue _ _ ($ $callk _ proc args)))
  360. (intset-subtract (intset-add (list->intset args) proc)
  361. (intmap-ref live-out label)))
  362. (($ $kargs _ _ ($ $continue k _($ $values args)))
  363. (match (intmap-ref cps k)
  364. (($ $ktail) (list->intset args))
  365. (_ #f)))
  366. (_ #f)))
  367. cps))
  368. (kills (intmap-map
  369. (lambda (label in)
  370. (let* ((out (intmap-ref live-out label))
  371. (killed (intset-subtract in out))
  372. (killed-slots (intset-intersect killed needs-slot)))
  373. (and (eq? killed-slots empty-intset)
  374. ;; Kill output variables that need slots.
  375. (intset-intersect (intmap-ref defs label)
  376. needs-slot))))
  377. live-in))
  378. (preds (invert-graph succs))
  379. (old->new (compute-reverse-control-flow-order preds)))
  380. (define (subtract lazy kill)
  381. (cond
  382. ((eq? lazy empty-intset)
  383. lazy)
  384. ((not kill)
  385. empty-intset)
  386. ((and lazy (eq? empty-intset (intset-subtract kill lazy)))
  387. (intset-subtract lazy kill))
  388. (else
  389. empty-intset)))
  390. (define (add live gen) (or gen live))
  391. (define (meet in out)
  392. ;; Initial in is #f.
  393. (if in (intset-intersect in out) out))
  394. (call-with-values
  395. (lambda ()
  396. (let ((succs (rename-graph preds old->new))
  397. (init (persistent-intmap
  398. (intmap-fold
  399. (lambda (old new in)
  400. (intmap-add! in new #f))
  401. old->new empty-intmap)))
  402. (kills (rename-keys kills old->new))
  403. (gens (rename-keys gens old->new)))
  404. (solve-flow-equations succs init init kills gens
  405. subtract add meet)))
  406. (lambda (in out)
  407. ;; A variable is lazy if its uses reach its definition.
  408. (intmap-fold (lambda (label out lazy)
  409. (match (intmap-ref cps label)
  410. (($ $kargs names vars)
  411. (let ((defs (list->intset vars)))
  412. (intset-union lazy (intset-intersect out defs))))
  413. (_ lazy)))
  414. (rename-keys out (invert-bijection old->new))
  415. empty-intset)))))
  416. (define (find-first-zero n)
  417. ;; Naive implementation.
  418. (let lp ((slot 0))
  419. (if (logbit? slot n)
  420. (lp (1+ slot))
  421. slot)))
  422. (define (find-first-trailing-zero n)
  423. (let lp ((slot (let lp ((count 2))
  424. (if (< n (ash 1 (1- count)))
  425. count
  426. ;; Grow upper bound slower than factor 2 to avoid
  427. ;; needless bignum allocation on 32-bit systems
  428. ;; when there are more than 16 locals.
  429. (lp (+ count (ash count -1)))))))
  430. (if (or (zero? slot) (logbit? (1- slot) n))
  431. slot
  432. (lp (1- slot)))))
  433. (define (integers from count)
  434. (if (zero? count)
  435. '()
  436. (cons from (integers (1+ from) (1- count)))))
  437. (define (solve-parallel-move src dst tmp)
  438. "Solve the parallel move problem between src and dst slot lists, which
  439. are comparable with eqv?. A tmp slot may be used."
  440. ;; This algorithm is taken from: "Tilting at windmills with Coq:
  441. ;; formal verification of a compilation algorithm for parallel moves"
  442. ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy
  443. ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf>
  444. (define (split-move moves reg)
  445. (let loop ((revhead '()) (tail moves))
  446. (match tail
  447. (((and s+d (s . d)) . rest)
  448. (if (eqv? s reg)
  449. (cons d (append-reverse revhead rest))
  450. (loop (cons s+d revhead) rest)))
  451. (_ #f))))
  452. (define (replace-last-source reg moves)
  453. (match moves
  454. ((moves ... (s . d))
  455. (append moves (list (cons reg d))))))
  456. (let loop ((to-move (map cons src dst))
  457. (being-moved '())
  458. (moved '())
  459. (last-source #f))
  460. ;; 'last-source' should always be equivalent to:
  461. ;; (and (pair? being-moved) (car (last being-moved)))
  462. (match being-moved
  463. (() (match to-move
  464. (() (reverse moved))
  465. (((and s+d (s . d)) . t1)
  466. (if (or (eqv? s d) ; idempotent
  467. (not s)) ; src is a constant and can be loaded directly
  468. (loop t1 '() moved #f)
  469. (loop t1 (list s+d) moved s)))))
  470. (((and s+d (s . d)) . b)
  471. (match (split-move to-move d)
  472. ((r . t1) (loop t1 (acons d r being-moved) moved last-source))
  473. (#f (match b
  474. (() (loop to-move '() (cons s+d moved) #f))
  475. (_ (if (eqv? d last-source)
  476. (loop to-move
  477. (replace-last-source tmp b)
  478. (cons s+d (acons d tmp moved))
  479. tmp)
  480. (loop to-move b (cons s+d moved) last-source))))))))))
  481. (define (compute-shuffles cps slots call-allocs live-in)
  482. (define (add-live-slot slot live-slots)
  483. (logior live-slots (ash 1 slot)))
  484. (define (get-cont label)
  485. (intmap-ref cps label))
  486. (define (get-slot var)
  487. (intmap-ref slots var (lambda (_) #f)))
  488. (define (get-slots vars)
  489. (let lp ((vars vars))
  490. (match vars
  491. ((var . vars) (cons (get-slot var) (lp vars)))
  492. (_ '()))))
  493. (define (get-proc-slot label)
  494. (call-alloc-proc-slot (intmap-ref call-allocs label)))
  495. (define (compute-live-slots label)
  496. (intset-fold (lambda (var live)
  497. (match (get-slot var)
  498. (#f live)
  499. (slot (add-live-slot slot live))))
  500. (intmap-ref live-in label)
  501. 0))
  502. ;; Although some parallel moves may proceed without a temporary slot,
  503. ;; in general one is needed. That temporary slot must not be part of
  504. ;; the source or destination sets, and that slot should not correspond
  505. ;; to a live variable. Usually the source and destination sets are a
  506. ;; subset of the union of the live sets before and after the move.
  507. ;; However for stack slots that don't have names -- those slots that
  508. ;; correspond to function arguments or to function return values -- it
  509. ;; could be that they are out of the computed live set. In that case
  510. ;; they need to be adjoined to the live set, used when choosing a
  511. ;; temporary slot.
  512. (define (compute-tmp-slot live stack-slots)
  513. (find-first-zero (fold add-live-slot live stack-slots)))
  514. (define (parallel-move src-slots dst-slots tmp-slot)
  515. (solve-parallel-move src-slots dst-slots tmp-slot))
  516. (define (compute-receive-shuffles label proc-slot)
  517. (match (get-cont label)
  518. (($ $kreceive arity kargs)
  519. (let* ((results (match (get-cont kargs)
  520. (($ $kargs names vars) vars)))
  521. (value-slots (integers (1+ proc-slot) (length results)))
  522. (result-slots (get-slots results))
  523. ;; Filter out unused results.
  524. (value-slots (filter-map (lambda (val result) (and result val))
  525. value-slots result-slots))
  526. (result-slots (filter (lambda (x) x) result-slots))
  527. (live (compute-live-slots kargs)))
  528. (parallel-move value-slots
  529. result-slots
  530. (compute-tmp-slot live value-slots))))))
  531. (define (add-call-shuffles label k args shuffles)
  532. (match (get-cont k)
  533. (($ $ktail)
  534. (let* ((live (compute-live-slots label))
  535. (tail-slots (integers 0 (length args)))
  536. (moves (parallel-move (get-slots args)
  537. tail-slots
  538. (compute-tmp-slot live tail-slots))))
  539. (intmap-add! shuffles label moves)))
  540. (($ $kreceive)
  541. (let* ((live (compute-live-slots label))
  542. (proc-slot (get-proc-slot label))
  543. (call-slots (integers proc-slot (length args)))
  544. (arg-moves (parallel-move (get-slots args)
  545. call-slots
  546. (compute-tmp-slot live call-slots))))
  547. (intmap-add! (intmap-add! shuffles label arg-moves)
  548. k (compute-receive-shuffles k proc-slot))))))
  549. (define (add-values-shuffles label k args shuffles)
  550. (match (get-cont k)
  551. (($ $ktail)
  552. (let* ((live (compute-live-slots label))
  553. (src-slots (get-slots args))
  554. (dst-slots (integers 1 (length args)))
  555. (moves (parallel-move src-slots dst-slots
  556. (compute-tmp-slot live dst-slots))))
  557. (intmap-add! shuffles label moves)))
  558. (($ $kargs _ dst-vars)
  559. (let* ((live (logior (compute-live-slots label)
  560. (compute-live-slots k)))
  561. (src-slots (get-slots args))
  562. (dst-slots (get-slots dst-vars))
  563. (moves (parallel-move src-slots dst-slots
  564. (compute-tmp-slot live '()))))
  565. (intmap-add! shuffles label moves)))))
  566. (define (add-prompt-shuffles label k handler shuffles)
  567. (intmap-add! shuffles handler
  568. (compute-receive-shuffles handler (get-proc-slot label))))
  569. (define (compute-shuffles label cont shuffles)
  570. (match cont
  571. (($ $kargs names vars ($ $continue k src exp))
  572. (match exp
  573. (($ $call proc args)
  574. (add-call-shuffles label k (cons proc args) shuffles))
  575. (($ $callk _ proc args)
  576. (add-call-shuffles label k (cons proc args) shuffles))
  577. (($ $values args)
  578. (add-values-shuffles label k args shuffles))
  579. (($ $prompt escape? tag handler)
  580. (add-prompt-shuffles label k handler shuffles))
  581. (_ shuffles)))
  582. (_ shuffles)))
  583. (persistent-intmap
  584. (intmap-fold compute-shuffles cps empty-intmap)))
  585. (define (compute-frame-sizes cps slots call-allocs shuffles)
  586. ;; Minimum frame has one slot: the closure.
  587. (define minimum-frame-size 1)
  588. (define (get-shuffles label)
  589. (intmap-ref shuffles label))
  590. (define (get-proc-slot label)
  591. (match (intmap-ref call-allocs label (lambda (_) #f))
  592. (#f 0) ;; Tail call.
  593. (($ $call-alloc proc-slot) proc-slot)))
  594. (define (max-size var size)
  595. (match (intmap-ref slots var (lambda (_) #f))
  596. (#f size)
  597. (slot (max size (1+ slot)))))
  598. (define (max-size* vars size)
  599. (fold max-size size vars))
  600. (define (shuffle-size moves size)
  601. (match moves
  602. (() size)
  603. (((src . dst) . moves)
  604. (shuffle-size moves (max size (1+ src) (1+ dst))))))
  605. (define (call-size label nargs size)
  606. (shuffle-size (get-shuffles label)
  607. (max (+ (get-proc-slot label) nargs) size)))
  608. (define (measure-cont label cont frame-sizes clause size)
  609. (match cont
  610. (($ $kfun)
  611. (values #f #f #f))
  612. (($ $kclause)
  613. (let ((frame-sizes (if clause
  614. (intmap-add! frame-sizes clause size)
  615. empty-intmap)))
  616. (values frame-sizes label minimum-frame-size)))
  617. (($ $kargs names vars ($ $continue k src exp))
  618. (values frame-sizes clause
  619. (let ((size (max-size* vars size)))
  620. (match exp
  621. (($ $call proc args)
  622. (call-size label (1+ (length args)) size))
  623. (($ $callk _ proc args)
  624. (call-size label (1+ (length args)) size))
  625. (($ $values args)
  626. (shuffle-size (get-shuffles label) size))
  627. (_ size)))))
  628. (($ $kreceive)
  629. (values frame-sizes clause
  630. (shuffle-size (get-shuffles label) size)))
  631. (($ $ktail)
  632. (values (intmap-add! frame-sizes clause size) #f #f))))
  633. (persistent-intmap (intmap-fold measure-cont cps #f #f #f)))
  634. (define (allocate-args cps)
  635. (intmap-fold (lambda (label cont slots)
  636. (match cont
  637. (($ $kfun src meta self)
  638. (intmap-add! slots self 0))
  639. (($ $kclause arity body alt)
  640. (match (intmap-ref cps body)
  641. (($ $kargs names vars)
  642. (let lp ((vars vars) (slots slots) (n 1))
  643. (match vars
  644. (() slots)
  645. ((var . vars)
  646. (lp vars
  647. (intmap-add! slots var n)
  648. (1+ n))))))))
  649. (_ slots)))
  650. cps empty-intmap))
  651. (define-inlinable (add-live-slot slot live-slots)
  652. (logior live-slots (ash 1 slot)))
  653. (define-inlinable (kill-dead-slot slot live-slots)
  654. (logand live-slots (lognot (ash 1 slot))))
  655. (define-inlinable (compute-slot live-slots hint)
  656. (if (and hint (not (logbit? hint live-slots)))
  657. hint
  658. (find-first-zero live-slots)))
  659. (define (allocate-lazy-vars cps slots call-allocs live-in lazy)
  660. (define (compute-live-slots slots label)
  661. (intset-fold (lambda (var live)
  662. (match (intmap-ref slots var (lambda (_) #f))
  663. (#f live)
  664. (slot (add-live-slot slot live))))
  665. (intmap-ref live-in label)
  666. 0))
  667. (define (allocate var hint slots live)
  668. (match (and hint (intmap-ref slots var (lambda (_) #f)))
  669. (#f (if (intset-ref lazy var)
  670. (let ((slot (compute-slot live hint)))
  671. (values (intmap-add! slots var slot)
  672. (add-live-slot slot live)))
  673. (values slots live)))
  674. (slot (values slots (add-live-slot slot live)))))
  675. (define (allocate* vars hints slots live)
  676. (match (vector vars hints)
  677. (#(() ()) slots)
  678. (#((var . vars) (hint . hints))
  679. (let-values (((slots live) (allocate var hint slots live)))
  680. (allocate* vars hints slots live)))))
  681. (define (get-proc-slot label)
  682. (match (intmap-ref call-allocs label (lambda (_) #f))
  683. (#f 0)
  684. (call (call-alloc-proc-slot call))))
  685. (define (allocate-call label args slots)
  686. (allocate* args (integers (get-proc-slot label) (length args))
  687. slots (compute-live-slots slots label)))
  688. (define (allocate-values label k args slots)
  689. (match (intmap-ref cps k)
  690. (($ $ktail)
  691. (allocate* args (integers 1 (length args))
  692. slots (compute-live-slots slots label)))
  693. (($ $kargs names vars)
  694. (allocate* args
  695. (map (cut intmap-ref slots <> (lambda (_) #f)) vars)
  696. slots (compute-live-slots slots label)))))
  697. (define (allocate-lazy label cont slots)
  698. (match cont
  699. (($ $kargs names vars ($ $continue k src exp))
  700. (match exp
  701. (($ $call proc args)
  702. (allocate-call label (cons proc args) slots))
  703. (($ $callk _ proc args)
  704. (allocate-call label (cons proc args) slots))
  705. (($ $values args)
  706. (allocate-values label k args slots))
  707. (_ slots)))
  708. (_
  709. slots)))
  710. ;; Sweep right to left to visit uses before definitions.
  711. (persistent-intmap
  712. (intmap-fold-right allocate-lazy cps slots)))
  713. (define (compute-var-representations cps)
  714. (define (get-defs k)
  715. (match (intmap-ref cps k)
  716. (($ $kargs names vars) vars)
  717. (_ '())))
  718. (intmap-fold
  719. (lambda (label cont representations)
  720. (match cont
  721. (($ $kargs _ _ ($ $continue k _ exp))
  722. (match (get-defs k)
  723. (() representations)
  724. ((var)
  725. (match exp
  726. (($ $values (arg))
  727. (intmap-add representations var
  728. (intmap-ref representations arg)))
  729. (($ $primcall (or 'scm->f64 'load-f64
  730. 'bv-f32-ref 'bv-f64-ref
  731. 'fadd 'fsub 'fmul 'fdiv))
  732. (intmap-add representations var 'f64))
  733. (($ $primcall (or 'scm->u64 'scm->u64/truncate 'load-u64
  734. 'bv-length 'vector-length 'string-length
  735. 'uadd 'usub 'umul
  736. 'ulogand 'ulogior 'ulogsub 'ursh 'ulsh
  737. 'uadd/immediate 'usub/immediate 'umul/immediate
  738. 'ursh/immediate 'ulsh/immediate
  739. 'bv-u8-ref 'bv-u16-ref 'bv-u32-ref 'bv-u64-ref))
  740. (intmap-add representations var 'u64))
  741. (($ $primcall (or 'scm->s64 'load-s64
  742. 'bv-s8-ref 'bv-s16-ref 'bv-s32-ref 'bv-s64-ref))
  743. (intmap-add representations var 's64))
  744. (_
  745. (intmap-add representations var 'scm))))
  746. (vars
  747. (match exp
  748. (($ $values args)
  749. (fold (lambda (arg var representations)
  750. (intmap-add representations var
  751. (intmap-ref representations arg)))
  752. representations args vars))))))
  753. (($ $kfun src meta self)
  754. (intmap-add representations self 'scm))
  755. (($ $kclause arity body alt)
  756. (fold1 (lambda (var representations)
  757. (intmap-add representations var 'scm))
  758. (get-defs body) representations))
  759. (($ $kreceive arity kargs)
  760. (fold1 (lambda (var representations)
  761. (intmap-add representations var 'scm))
  762. (get-defs kargs) representations))
  763. (($ $ktail) representations)))
  764. cps
  765. empty-intmap))
  766. (define (allocate-slots cps)
  767. (let*-values (((defs uses) (compute-defs-and-uses cps))
  768. ((representations) (compute-var-representations cps))
  769. ((live-in live-out) (compute-live-variables cps defs uses))
  770. ((constants) (compute-constant-values cps))
  771. ((needs-slot) (compute-needs-slot cps defs uses))
  772. ((lazy) (compute-lazy-vars cps live-in live-out defs
  773. needs-slot)))
  774. (define (empty-live-slots)
  775. #b0)
  776. (define (compute-call-proc-slot live-slots)
  777. (+ 2 (find-first-trailing-zero live-slots)))
  778. (define (compute-prompt-handler-proc-slot live-slots)
  779. (if (zero? live-slots)
  780. 0
  781. (1- (find-first-trailing-zero live-slots))))
  782. (define (get-cont label)
  783. (intmap-ref cps label))
  784. (define (get-slot slots var)
  785. (intmap-ref slots var (lambda (_) #f)))
  786. (define (get-slots slots vars)
  787. (let lp ((vars vars))
  788. (match vars
  789. ((var . vars) (cons (get-slot slots var) (lp vars)))
  790. (_ '()))))
  791. (define (compute-live-slots* slots label live-vars)
  792. (intset-fold (lambda (var live)
  793. (match (get-slot slots var)
  794. (#f live)
  795. (slot (add-live-slot slot live))))
  796. (intmap-ref live-vars label)
  797. 0))
  798. (define (compute-live-in-slots slots label)
  799. (compute-live-slots* slots label live-in))
  800. (define (compute-live-out-slots slots label)
  801. (compute-live-slots* slots label live-out))
  802. (define slot-desc-dead 0)
  803. (define slot-desc-live-raw 1)
  804. (define slot-desc-live-scm 2)
  805. (define slot-desc-unused 3)
  806. (define (compute-slot-map slots live-vars nslots)
  807. (intset-fold
  808. (lambda (var slot-map)
  809. (match (get-slot slots var)
  810. (#f slot-map)
  811. (slot
  812. (let ((desc (match (intmap-ref representations var)
  813. ((or 'u64 'f64 's64) slot-desc-live-raw)
  814. ('scm slot-desc-live-scm))))
  815. (logior slot-map (ash desc (* 2 slot)))))))
  816. live-vars 0))
  817. (define (allocate var hint slots live)
  818. (cond
  819. ((not (intset-ref needs-slot var))
  820. (values slots live))
  821. ((get-slot slots var)
  822. => (lambda (slot)
  823. (values slots (add-live-slot slot live))))
  824. ((and (not hint) (intset-ref lazy var))
  825. (values slots live))
  826. (else
  827. (let ((slot (compute-slot live hint)))
  828. (values (intmap-add! slots var slot)
  829. (add-live-slot slot live))))))
  830. (define (allocate* vars hints slots live)
  831. (match (vector vars hints)
  832. (#(() ()) (values slots live))
  833. (#((var . vars) (hint . hints))
  834. (call-with-values (lambda () (allocate var hint slots live))
  835. (lambda (slots live)
  836. (allocate* vars hints slots live))))))
  837. (define (allocate-defs label vars slots)
  838. (let ((live (compute-live-in-slots slots label))
  839. (live-vars (intmap-ref live-in label)))
  840. (let lp ((vars vars) (slots slots) (live live))
  841. (match vars
  842. (() (values slots live))
  843. ((var . vars)
  844. (call-with-values (lambda () (allocate var #f slots live))
  845. (lambda (slots live)
  846. (lp vars slots
  847. (let ((slot (get-slot slots var)))
  848. (if (and slot (not (intset-ref live-vars var)))
  849. (kill-dead-slot slot live)
  850. live))))))))))
  851. ;; PRE-LIVE are the live slots coming into the term. POST-LIVE
  852. ;; is the subset of PRE-LIVE that is still live after the term
  853. ;; uses its inputs.
  854. (define (allocate-call label k args slots call-allocs pre-live)
  855. (match (get-cont k)
  856. (($ $ktail)
  857. (let ((tail-slots (integers 0 (length args))))
  858. (values (allocate* args tail-slots slots pre-live)
  859. call-allocs)))
  860. (($ $kreceive arity kargs)
  861. (let*-values
  862. (((post-live) (compute-live-out-slots slots label))
  863. ((proc-slot) (compute-call-proc-slot post-live))
  864. ((call-slots) (integers proc-slot (length args)))
  865. ((slots pre-live) (allocate* args call-slots slots pre-live))
  866. ;; Allow the first result to be hinted by its use, but
  867. ;; hint the remaining results to stay in place. This
  868. ;; strikes a balance between avoiding shuffling,
  869. ;; especially for unused extra values, and avoiding frame
  870. ;; size growth due to sparse locals.
  871. ((slots result-live)
  872. (match (get-cont kargs)
  873. (($ $kargs () ())
  874. (values slots post-live))
  875. (($ $kargs (_ . _) (_ . results))
  876. (let ((result-slots (integers (+ proc-slot 2)
  877. (length results))))
  878. (allocate* results result-slots slots post-live)))))
  879. ((slot-map) (compute-slot-map slots (intmap-ref live-out label)
  880. (- proc-slot 2)))
  881. ((call) (make-call-alloc proc-slot slot-map)))
  882. (values slots
  883. (intmap-add! call-allocs label call))))))
  884. (define (allocate-values label k args slots call-allocs)
  885. (match (get-cont k)
  886. (($ $ktail)
  887. (values slots call-allocs))
  888. (($ $kargs (_) (dst))
  889. ;; When there is only one value in play, we allow the dst to be
  890. ;; hinted (see compute-lazy-vars). If the src doesn't have a
  891. ;; slot, then the actual slot for the dst would end up being
  892. ;; decided by the call that args it. Because we don't know the
  893. ;; slot, we can't really compute the parallel moves in that
  894. ;; case, so just bail and rely on the bytecode emitter to
  895. ;; handle the one-value case specially.
  896. (match args
  897. ((src)
  898. (let ((post-live (compute-live-out-slots slots label)))
  899. (values (allocate dst (get-slot slots src) slots post-live)
  900. call-allocs)))))
  901. (($ $kargs _ dst-vars)
  902. (let ((src-slots (get-slots slots args))
  903. (post-live (compute-live-out-slots slots label)))
  904. (values (allocate* dst-vars src-slots slots post-live)
  905. call-allocs)))))
  906. (define (allocate-prompt label k handler slots call-allocs)
  907. (match (get-cont handler)
  908. (($ $kreceive arity kargs)
  909. (let*-values
  910. (((handler-live) (compute-live-in-slots slots handler))
  911. ((proc-slot) (compute-prompt-handler-proc-slot handler-live))
  912. ((slot-map) (compute-slot-map slots (intmap-ref live-in handler)
  913. (- proc-slot 2)))
  914. ((result-vars) (match (get-cont kargs)
  915. (($ $kargs names vars) vars)))
  916. ((value-slots) (integers (1+ proc-slot) (length result-vars)))
  917. ((slots result-live) (allocate* result-vars value-slots
  918. slots handler-live)))
  919. (values slots
  920. (intmap-add! call-allocs label
  921. (make-call-alloc proc-slot slot-map)))))))
  922. (define (allocate-cont label cont slots call-allocs)
  923. (match cont
  924. (($ $kargs names vars ($ $continue k src exp))
  925. (let-values (((slots live) (allocate-defs label vars slots)))
  926. (match exp
  927. (($ $call proc args)
  928. (allocate-call label k (cons proc args) slots call-allocs live))
  929. (($ $callk _ proc args)
  930. (allocate-call label k (cons proc args) slots call-allocs live))
  931. (($ $values args)
  932. (allocate-values label k args slots call-allocs))
  933. (($ $prompt escape? tag handler)
  934. (allocate-prompt label k handler slots call-allocs))
  935. (_
  936. (values slots call-allocs)))))
  937. (_
  938. (values slots call-allocs))))
  939. (call-with-values (lambda ()
  940. (let ((slots (allocate-args cps)))
  941. (intmap-fold allocate-cont cps slots empty-intmap)))
  942. (lambda (slots calls)
  943. (let* ((slots (allocate-lazy-vars cps slots calls live-in lazy))
  944. (shuffles (compute-shuffles cps slots calls live-in))
  945. (frame-sizes (compute-frame-sizes cps slots calls shuffles)))
  946. (make-allocation slots representations constants calls
  947. shuffles frame-sizes))))))