slot-allocation.scm 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. ;; Continuation-passing style (CPS) intermediate language (IL)
  2. ;; Copyright (C) 2013-2021 Free Software Foundation, Inc.
  3. ;;;; This library is free software; you can redistribute it and/or
  4. ;;;; modify it under the terms of the GNU Lesser General Public
  5. ;;;; License as published by the Free Software Foundation; either
  6. ;;;; version 3 of the License, or (at your option) any later version.
  7. ;;;;
  8. ;;;; This library is distributed in the hope that it will be useful,
  9. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. ;;;; Lesser General Public License for more details.
  12. ;;;;
  13. ;;;; You should have received a copy of the GNU Lesser General Public
  14. ;;;; License along with this library; if not, write to the Free Software
  15. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  16. ;;; Commentary:
  17. ;;;
  18. ;;; A module to assign stack slots to variables in a CPS term.
  19. ;;;
  20. ;;; Code:
  21. (define-module (language cps slot-allocation)
  22. #:use-module (ice-9 control)
  23. #:use-module (ice-9 match)
  24. #:use-module (srfi srfi-1)
  25. #:use-module (srfi srfi-9)
  26. #:use-module (srfi srfi-11)
  27. #:use-module (srfi srfi-26)
  28. #:use-module (language cps)
  29. #:use-module (language cps graphs)
  30. #:use-module (language cps utils)
  31. #:use-module (language cps intmap)
  32. #:use-module (language cps intset)
  33. #:export (allocate-slots
  34. lookup-slot
  35. lookup-maybe-slot
  36. lookup-representation
  37. lookup-nlocals
  38. lookup-call-proc-slot
  39. lookup-send-parallel-moves
  40. lookup-receive-parallel-moves
  41. lookup-slot-map))
  42. (define-record-type $allocation
  43. (make-allocation slots representations call-allocs shuffles frame-size)
  44. allocation?
  45. ;; A map of VAR to slot allocation. A slot allocation is an integer,
  46. ;; if the variable has been assigned a slot.
  47. ;;
  48. (slots allocation-slots)
  49. ;; A map of VAR to representation. A representation is 'scm, 'f64,
  50. ;; 'u64, or 's64.
  51. ;;
  52. (representations allocation-representations)
  53. ;; A map of LABEL to /call allocs/, for non-tail $call/$callk, and for
  54. ;; $prompt.
  55. ;;
  56. ;; A call alloc contains two pieces of information: the call's /proc
  57. ;; slot/ and a /dead slot map/. The proc slot indicates the slot of a
  58. ;; procedure in a procedure call, or where the procedure would be in a
  59. ;; multiple-value return.
  60. ;;
  61. ;; The dead slot map indicates, what slots should be ignored by GC
  62. ;; when marking the frame. A dead slot map is a bitfield, as an
  63. ;; integer.
  64. ;;
  65. (call-allocs allocation-call-allocs)
  66. ;; A map of LABEL to /parallel moves/. Parallel moves shuffle locals
  67. ;; into position for a $call, $callk, or $values, or shuffle returned
  68. ;; values back into place at a return continuation.
  69. ;;
  70. ;; A set of moves is expressed as an ordered list of (SRC . DST)
  71. ;; moves, where SRC and DST are slots. This may involve a temporary
  72. ;; variable.
  73. ;;
  74. (shuffles allocation-shuffles)
  75. ;; The number of local slots needed for this function. Because we can
  76. ;; contify common clause tails, we use one frame size for all clauses
  77. ;; to avoid having to adjust the frame size when continuing to labels
  78. ;; from other clauses.
  79. ;;
  80. (frame-size allocation-frame-size))
  81. (define-record-type $call-alloc
  82. (make-call-alloc proc-slot slot-map)
  83. call-alloc?
  84. (proc-slot call-alloc-proc-slot)
  85. (slot-map call-alloc-slot-map))
  86. (define (lookup-maybe-slot var allocation)
  87. (intmap-ref (allocation-slots allocation) var (lambda (_) #f)))
  88. (define (lookup-slot var allocation)
  89. (intmap-ref (allocation-slots allocation) var))
  90. (define (lookup-representation var allocation)
  91. (intmap-ref (allocation-representations allocation) var))
  92. (define *absent* (list 'absent))
  93. (define (lookup-call-alloc k allocation)
  94. (intmap-ref (allocation-call-allocs allocation) k))
  95. (define (lookup-call-proc-slot k allocation)
  96. (or (call-alloc-proc-slot (lookup-call-alloc k allocation))
  97. (error "Call has no proc slot" k)))
  98. (define (lookup-send-parallel-moves k allocation)
  99. (match (intmap-ref (allocation-shuffles allocation) k)
  100. ((send . receive) send)))
  101. (define (lookup-receive-parallel-moves k allocation)
  102. (match (intmap-ref (allocation-shuffles allocation) k)
  103. ((send . receive) receive)))
  104. (define (lookup-slot-map k allocation)
  105. (or (call-alloc-slot-map (lookup-call-alloc k allocation))
  106. (error "Call has no slot map" k)))
  107. (define (lookup-nlocals allocation)
  108. (allocation-frame-size allocation))
  109. (define* (add-prompt-control-flow-edges conts succs #:key complete?)
  110. "For all prompts in DFG in the range [MIN-LABEL, MIN-LABEL +
  111. LABEL-COUNT), invoke F with arguments PROMPT, HANDLER, and BODY for each
  112. body continuation in the prompt."
  113. (define (intset-filter pred set)
  114. (intset-fold (lambda (i set)
  115. (if (pred i) set (intset-remove set i)))
  116. set
  117. set))
  118. (define (intset-any pred set)
  119. (intset-fold (lambda (i res)
  120. (if (or res (pred i)) #t res))
  121. set
  122. #f))
  123. (define (compute-prompt-body label)
  124. (persistent-intset
  125. (let visit-cont ((label label) (level 1) (labels empty-intset))
  126. (cond
  127. ((zero? level) labels)
  128. ((intset-ref labels label) labels)
  129. (else
  130. (let ((labels (intset-add! labels label)))
  131. (match (intmap-ref conts label)
  132. (($ $kreceive arity k) (visit-cont k level labels))
  133. (($ $kargs names syms ($ $continue k src ($ $primcall 'wind)))
  134. (visit-cont k (1+ level) labels))
  135. (($ $kargs names syms ($ $continue k src ($ $primcall 'unwind)))
  136. (visit-cont k (1- level) labels))
  137. (($ $kargs names syms ($ $continue k src exp))
  138. (visit-cont k level labels))
  139. (($ $kargs names syms ($ $branch kf kt))
  140. (visit-cont kf level (visit-cont kt level labels)))
  141. (($ $kargs names syms ($ $switch kf kt*))
  142. (fold1 (lambda (label labels)
  143. (visit-cont label level labels))
  144. (cons kf kt*) labels))
  145. (($ $kargs names syms ($ $prompt k kh src escape? tag))
  146. (visit-cont kh level (visit-cont k (1+ level) labels)))
  147. (($ $kargs names syms ($ $throw)) labels))))))))
  148. (define (visit-prompt label handler succs)
  149. (let ((body (compute-prompt-body label)))
  150. (define (out-or-back-edge? label)
  151. ;; Most uses of visit-prompt-control-flow don't need every body
  152. ;; continuation, and would be happy getting called only for
  153. ;; continuations that postdominate the rest of the body. Unless
  154. ;; you pass #:complete? #t, we only invoke F on continuations
  155. ;; that can leave the body, or on back-edges in loops.
  156. (not (intset-any (lambda (succ)
  157. (and (intset-ref body succ) (< label succ)))
  158. (intmap-ref succs label))))
  159. (intset-fold (lambda (pred succs)
  160. (intmap-replace succs pred handler intset-add))
  161. (if complete? body (intset-filter out-or-back-edge? body))
  162. succs)))
  163. (intmap-fold
  164. (lambda (label cont succs)
  165. (match cont
  166. (($ $kargs _ _ ($ $prompt k kh))
  167. (visit-prompt k kh succs))
  168. (_ succs)))
  169. conts
  170. succs))
  171. (define (compute-needs-slot cps defs uses)
  172. (define (get-defs k) (intmap-ref defs k))
  173. (define (get-uses label) (intmap-ref uses label))
  174. (intmap-fold
  175. (lambda (label cont needs-slot)
  176. (intset-union
  177. needs-slot
  178. (match cont
  179. (($ $kargs)
  180. (intset-union (get-defs label) (get-uses label)))
  181. (($ $kreceive arity k)
  182. ;; Only allocate results of function calls to slots if they are
  183. ;; used.
  184. empty-intset)
  185. (($ $kclause arity body alternate)
  186. (get-defs label))
  187. (($ $kfun src meta self)
  188. (get-defs label))
  189. (($ $ktail)
  190. empty-intset))))
  191. cps
  192. empty-intset))
  193. (define (compute-lazy-vars cps live-in live-out defs needs-slot)
  194. "Compute and return a set of vars whose allocation can be delayed
  195. until their use is seen. These are \"lazy\" vars. A var is lazy if its
  196. uses are calls, it is always dead after the calls, and if the uses flow
  197. to the definition. A flow continues across a node iff the node kills no
  198. values that need slots, and defines only lazy vars. Calls also kill
  199. flows; there's no sense in trying to juggle a pending frame while there
  200. is an active call."
  201. (define (list->intset list)
  202. (persistent-intset
  203. (fold (lambda (i set) (intset-add! set i)) empty-intset list)))
  204. (let* ((succs (compute-successors cps))
  205. (gens (intmap-map
  206. (lambda (label cont)
  207. (match cont
  208. (($ $kargs _ _ ($ $continue _ _ ($ $call proc args)))
  209. (intset-subtract (intset-add (list->intset args) proc)
  210. (intmap-ref live-out label)))
  211. (($ $kargs _ _ ($ $continue _ _ ($ $callk _ proc args)))
  212. (let ((args (list->intset args)))
  213. (intset-subtract (if proc (intset-add args proc) args)
  214. (intmap-ref live-out label))))
  215. (($ $kargs _ _ ($ $continue k _($ $values args)))
  216. (match (intmap-ref cps k)
  217. (($ $ktail) (list->intset args))
  218. (_ #f)))
  219. (_ #f)))
  220. cps))
  221. (kills (intmap-map
  222. (lambda (label in)
  223. (let* ((out (intmap-ref live-out label))
  224. (killed (intset-subtract in out))
  225. (killed-slots (intset-intersect killed needs-slot)))
  226. (and (eq? killed-slots empty-intset)
  227. ;; Kill output variables that need slots.
  228. (intset-intersect (intmap-ref defs label)
  229. needs-slot))))
  230. live-in))
  231. (preds (invert-graph succs))
  232. (old->new (compute-reverse-control-flow-order preds)))
  233. (define (subtract lazy kill)
  234. (cond
  235. ((eq? lazy empty-intset)
  236. lazy)
  237. ((not kill)
  238. empty-intset)
  239. ((and lazy (eq? empty-intset (intset-subtract kill lazy)))
  240. (intset-subtract lazy kill))
  241. (else
  242. empty-intset)))
  243. (define (add live gen) (or gen live))
  244. (define (meet in out)
  245. ;; Initial in is #f.
  246. (if in (intset-intersect in out) out))
  247. (call-with-values
  248. (lambda ()
  249. (let ((succs (rename-graph preds old->new))
  250. (init (persistent-intmap
  251. (intmap-fold
  252. (lambda (old new in)
  253. (intmap-add! in new #f))
  254. old->new empty-intmap)))
  255. (kills (rename-keys kills old->new))
  256. (gens (rename-keys gens old->new)))
  257. (solve-flow-equations succs init init kills gens
  258. subtract add meet)))
  259. (lambda (in out)
  260. ;; A variable is lazy if its uses reach its definition.
  261. (intmap-fold (lambda (label out lazy)
  262. (match (intmap-ref cps label)
  263. (($ $kargs names vars)
  264. (let ((defs (list->intset vars)))
  265. (intset-union lazy (intset-intersect out defs))))
  266. (_ lazy)))
  267. (rename-keys out (invert-bijection old->new))
  268. empty-intset)))))
  269. (define (find-first-zero n)
  270. ;; Naive implementation.
  271. (let lp ((slot 0))
  272. (if (logbit? slot n)
  273. (lp (1+ slot))
  274. slot)))
  275. (define (find-first-trailing-zero n)
  276. (let lp ((slot (let lp ((count 2))
  277. (if (< n (ash 1 (1- count)))
  278. count
  279. ;; Grow upper bound slower than factor 2 to avoid
  280. ;; needless bignum allocation on 32-bit systems
  281. ;; when there are more than 16 locals.
  282. (lp (+ count (ash count -1)))))))
  283. (if (or (zero? slot) (logbit? (1- slot) n))
  284. slot
  285. (lp (1- slot)))))
  286. (define (integers from count)
  287. (if (zero? count)
  288. '()
  289. (cons from (integers (1+ from) (1- count)))))
  290. (define (solve-parallel-move src dst tmp)
  291. "Solve the parallel move problem between src and dst slot lists, which
  292. are comparable with eqv?. A tmp slot may be used."
  293. ;; This algorithm is taken from: "Tilting at windmills with Coq:
  294. ;; formal verification of a compilation algorithm for parallel moves"
  295. ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy
  296. ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf>
  297. (define (split-move moves reg)
  298. (let loop ((revhead '()) (tail moves))
  299. (match tail
  300. (((and s+d (s . d)) . rest)
  301. (if (eqv? s reg)
  302. (cons d (append-reverse revhead rest))
  303. (loop (cons s+d revhead) rest)))
  304. (_ #f))))
  305. (define (replace-last-source reg moves)
  306. (match moves
  307. ((moves ... (s . d))
  308. (append moves (list (cons reg d))))))
  309. (let loop ((to-move (map cons src dst))
  310. (being-moved '())
  311. (moved '())
  312. (last-source #f))
  313. ;; 'last-source' should always be equivalent to:
  314. ;; (and (pair? being-moved) (car (last being-moved)))
  315. (match being-moved
  316. (() (match to-move
  317. (() (reverse moved))
  318. (((and s+d (s . d)) . t1)
  319. (if (or (eqv? s d) ; idempotent
  320. (not s)) ; src is a constant and can be loaded directly
  321. (loop t1 '() moved #f)
  322. (loop t1 (list s+d) moved s)))))
  323. (((and s+d (s . d)) . b)
  324. (match (split-move to-move d)
  325. ((r . t1) (loop t1 (acons d r being-moved) moved last-source))
  326. (#f (match b
  327. (() (loop to-move '() (cons s+d moved) #f))
  328. (_ (if (eqv? d last-source)
  329. (loop to-move
  330. (replace-last-source tmp b)
  331. (cons s+d (acons d tmp moved))
  332. tmp)
  333. (loop to-move b (cons s+d moved) last-source))))))))))
  334. (define-inlinable (add-live-slot slot live-slots)
  335. (logior live-slots (ash 1 slot)))
  336. (define-inlinable (kill-dead-slot slot live-slots)
  337. (logand live-slots (lognot (ash 1 slot))))
  338. (define-inlinable (compute-slot live-slots hint)
  339. (if (and hint (not (logbit? hint live-slots)))
  340. hint
  341. (find-first-zero live-slots)))
  342. (define (compute-shuffles cps slots call-allocs live-in)
  343. (define (get-cont label)
  344. (intmap-ref cps label))
  345. (define (get-slot var)
  346. (intmap-ref slots var (lambda (_) #f)))
  347. (define (get-slots vars)
  348. (let lp ((vars vars))
  349. (match vars
  350. ((var . vars) (cons (get-slot var) (lp vars)))
  351. (_ '()))))
  352. (define (get-proc-slot label)
  353. (call-alloc-proc-slot (intmap-ref call-allocs label)))
  354. (define (compute-live-slots label)
  355. (intset-fold (lambda (var live)
  356. (match (get-slot var)
  357. (#f live)
  358. (slot (add-live-slot slot live))))
  359. (intmap-ref live-in label)
  360. 0))
  361. ;; Although some parallel moves may proceed without a temporary slot,
  362. ;; in general one is needed. That temporary slot must not be part of
  363. ;; the source or destination sets, and that slot should not correspond
  364. ;; to a live variable. Usually the source and destination sets are a
  365. ;; subset of the union of the live sets before and after the move.
  366. ;; However for stack slots that don't have names -- those slots that
  367. ;; correspond to function arguments or to function return values -- it
  368. ;; could be that they are out of the computed live set. In that case
  369. ;; they need to be adjoined to the live set, used when choosing a
  370. ;; temporary slot.
  371. (define (compute-tmp-slot live stack-slots)
  372. (find-first-zero (fold add-live-slot live stack-slots)))
  373. (define (parallel-move src-slots dst-slots tmp-slot)
  374. (solve-parallel-move src-slots dst-slots tmp-slot))
  375. ;; A term can have two sets of shuffles: one set to shuffle operands
  376. ;; to the term (the "send moves"), and one set to shuffle results (the
  377. ;; "receive moves"). An example of send moves would be a call getting
  378. ;; its arguments into position, or a $values performing a parallel
  379. ;; move. Receive moves come when binding call results to values, for
  380. ;; local returns (call returns) or non-local returns (prompt
  381. ;; handlers).
  382. (define (add-shuffles shuffles label send-moves receive-moves)
  383. (intmap-add! shuffles label (cons send-moves receive-moves)))
  384. (define (compute-receive-shuffles k proc-slot)
  385. (match (get-cont k)
  386. (($ $kreceive arity kargs)
  387. (compute-receive-shuffles kargs proc-slot))
  388. (($ $kargs names results)
  389. (let* ((value-slots (integers proc-slot (length results)))
  390. (result-slots (get-slots results))
  391. ;; Filter out unused results.
  392. (value-slots (filter-map (lambda (val result) (and result val))
  393. value-slots result-slots))
  394. (result-slots (filter (lambda (x) x) result-slots))
  395. (live (compute-live-slots k)))
  396. (parallel-move value-slots
  397. result-slots
  398. (compute-tmp-slot live value-slots))))))
  399. (define (add-call-shuffles label k args shuffles)
  400. (match (get-cont k)
  401. (($ $ktail)
  402. (let* ((live (compute-live-slots label))
  403. (tail-slots (integers 0 (length args)))
  404. (send-moves (parallel-move (get-slots args)
  405. tail-slots
  406. (compute-tmp-slot live tail-slots))))
  407. (add-shuffles shuffles label send-moves '())))
  408. ((or ($ $kargs) ($ $kreceive))
  409. (let* ((live (compute-live-slots label))
  410. (proc-slot (get-proc-slot label))
  411. (call-slots (integers proc-slot (length args)))
  412. (send-moves (parallel-move (get-slots args)
  413. call-slots
  414. (compute-tmp-slot live call-slots)))
  415. (receive-moves (compute-receive-shuffles k proc-slot)))
  416. (add-shuffles shuffles label send-moves receive-moves)))))
  417. (define (add-values-shuffles label k args shuffles)
  418. (match (get-cont k)
  419. (($ $ktail)
  420. (let* ((live (compute-live-slots label))
  421. (src-slots (get-slots args))
  422. (dst-slots (integers 0 (length args)))
  423. (send-moves (parallel-move src-slots dst-slots
  424. (compute-tmp-slot live dst-slots))))
  425. (add-shuffles shuffles label send-moves '())))
  426. (($ $kargs _ dst-vars)
  427. (let* ((live (logior (compute-live-slots label)
  428. (compute-live-slots k)))
  429. (src-slots (get-slots args))
  430. (dst-slots (get-slots dst-vars))
  431. (send-moves (parallel-move src-slots dst-slots
  432. (compute-tmp-slot live '()))))
  433. (add-shuffles shuffles label send-moves '())))))
  434. (define (add-prompt-shuffles label k handler shuffles)
  435. (define receive-moves
  436. (compute-receive-shuffles handler (get-proc-slot label)))
  437. (add-shuffles shuffles label '() receive-moves))
  438. (define (compute-shuffles label cont shuffles)
  439. (match cont
  440. (($ $kargs names vars ($ $continue k src exp))
  441. (match exp
  442. (($ $call proc args)
  443. (add-call-shuffles label k (cons proc args) shuffles))
  444. (($ $callk _ proc args)
  445. (add-call-shuffles label k (if proc (cons proc args) args) shuffles))
  446. (($ $values args)
  447. (add-values-shuffles label k args shuffles))
  448. (_ shuffles)))
  449. (($ $kargs names vars ($ $prompt k kh src escape? tag))
  450. (add-prompt-shuffles label k kh shuffles))
  451. (_ shuffles)))
  452. (persistent-intmap
  453. (intmap-fold compute-shuffles cps empty-intmap)))
  454. (define (compute-frame-size cps slots call-allocs shuffles)
  455. ;; Minimum frame has one slot: the closure.
  456. (define minimum-frame-size 1)
  457. (define (get-shuffles label)
  458. (intmap-ref shuffles label))
  459. (define (get-proc-slot label)
  460. (match (intmap-ref call-allocs label (lambda (_) #f))
  461. (#f 0) ;; Tail call.
  462. (($ $call-alloc proc-slot) proc-slot)))
  463. (define (max-size var size)
  464. (match (intmap-ref slots var (lambda (_) #f))
  465. (#f size)
  466. (slot (max size (1+ slot)))))
  467. (define (max-size* vars size)
  468. (fold max-size size vars))
  469. (define (shuffle-size* moves size)
  470. (match moves
  471. (() size)
  472. (((src . dst) . moves)
  473. (shuffle-size* moves (max size (1+ src) (1+ dst))))))
  474. (define (shuffle-size send+receive size)
  475. (match send+receive
  476. ((send . receive) (shuffle-size* send (shuffle-size* receive size)))))
  477. (define (call-size label nargs size)
  478. (shuffle-size (get-shuffles label)
  479. (max (+ (get-proc-slot label) nargs) size)))
  480. (define (measure-cont label cont size)
  481. (match cont
  482. (($ $kargs names vars term)
  483. (let ((size (max-size* vars size)))
  484. (match term
  485. (($ $continue _ _ ($ $call proc args))
  486. (call-size label (1+ (length args)) size))
  487. (($ $continue _ _ ($ $callk _ proc args))
  488. (let ((nclosure (if proc 1 0)))
  489. (call-size label (+ nclosure (length args)) size)))
  490. (($ $continue _ _ ($ $values args))
  491. (shuffle-size (get-shuffles label) size))
  492. (($ $prompt)
  493. (shuffle-size (get-shuffles label) size))
  494. (_ size))))
  495. (_ size)))
  496. (intmap-fold measure-cont cps minimum-frame-size))
  497. (define (allocate-args cps)
  498. (define (add-clause entry first-slot slots)
  499. (match (intmap-ref cps entry)
  500. (($ $kclause arity body alt)
  501. (let ((slots (add-clause body first-slot slots)))
  502. (if alt
  503. (add-clause alt first-slot slots)
  504. slots)))
  505. (($ $kargs names vars)
  506. (let lp ((vars vars) (n first-slot) (slots slots))
  507. (match vars
  508. (() slots)
  509. ((var . vars)
  510. (lp vars
  511. (1+ n)
  512. (intmap-add slots var n))))))))
  513. (match (intmap-ref cps (intmap-next cps))
  514. (($ $kfun src meta self tail entry)
  515. (add-clause
  516. entry
  517. (if self 1 0)
  518. (if self
  519. (intmap-add empty-intmap self 0)
  520. empty-intmap)))))
  521. (define (allocate-lazy-vars cps slots call-allocs live-in lazy)
  522. (define (compute-live-slots slots label)
  523. (intset-fold (lambda (var live)
  524. (match (intmap-ref slots var (lambda (_) #f))
  525. (#f live)
  526. (slot (add-live-slot slot live))))
  527. (intmap-ref live-in label)
  528. 0))
  529. (define (allocate var hint slots live)
  530. (match (and hint (intmap-ref slots var (lambda (_) #f)))
  531. (#f (if (intset-ref lazy var)
  532. (let ((slot (compute-slot live hint)))
  533. (values (intmap-add! slots var slot)
  534. (add-live-slot slot live)))
  535. (values slots live)))
  536. (slot (values slots (add-live-slot slot live)))))
  537. (define (allocate* vars hints slots live)
  538. (match (vector vars hints)
  539. (#(() ()) slots)
  540. (#((var . vars) (hint . hints))
  541. (let-values (((slots live) (allocate var hint slots live)))
  542. (allocate* vars hints slots live)))))
  543. (define (get-proc-slot label)
  544. (match (intmap-ref call-allocs label (lambda (_) #f))
  545. (#f 0)
  546. (call (call-alloc-proc-slot call))))
  547. (define (allocate-call label args slots)
  548. (allocate* args (integers (get-proc-slot label) (length args))
  549. slots (compute-live-slots slots label)))
  550. (define (allocate-values label k args slots)
  551. (match (intmap-ref cps k)
  552. (($ $ktail)
  553. (allocate* args (integers 0 (length args))
  554. slots (compute-live-slots slots label)))
  555. (($ $kargs names vars)
  556. (allocate* args
  557. (map (cut intmap-ref slots <> (lambda (_) #f)) vars)
  558. slots (compute-live-slots slots label)))))
  559. (define (allocate-lazy label cont slots)
  560. (match cont
  561. (($ $kargs names vars ($ $continue k src exp))
  562. (match exp
  563. (($ $call proc args)
  564. (allocate-call label (cons proc args) slots))
  565. (($ $callk _ proc args)
  566. (allocate-call label (if proc (cons proc args) args) slots))
  567. (($ $values args)
  568. (allocate-values label k args slots))
  569. (_ slots)))
  570. (_
  571. slots)))
  572. ;; Sweep right to left to visit uses before definitions.
  573. (persistent-intmap
  574. (intmap-fold-right allocate-lazy cps slots)))
  575. (define* (allocate-slots cps #:key (precolor-calls? #t))
  576. (let*-values (((defs uses) (compute-defs-and-uses cps))
  577. ((representations) (compute-var-representations cps))
  578. ((live-in live-out)
  579. (let* ((succs (compute-successors cps))
  580. (succs+ (add-prompt-control-flow-edges cps succs))
  581. (preds (invert-graph succs+)))
  582. (compute-live-variables preds defs uses)))
  583. ((needs-slot) (compute-needs-slot cps defs uses))
  584. ((lazy) (if precolor-calls?
  585. (compute-lazy-vars cps live-in live-out defs
  586. needs-slot)
  587. empty-intset)))
  588. (define frame-size 3)
  589. (define (empty-live-slots)
  590. #b0)
  591. (define (compute-call-proc-slot live-slots)
  592. (+ frame-size (find-first-trailing-zero live-slots)))
  593. (define (compute-prompt-handler-proc-slot live-slots)
  594. (find-first-trailing-zero live-slots))
  595. (define (get-cont label)
  596. (intmap-ref cps label))
  597. (define (get-slot slots var)
  598. (intmap-ref slots var (lambda (_) #f)))
  599. (define (get-slots slots vars)
  600. (let lp ((vars vars))
  601. (match vars
  602. ((var . vars) (cons (get-slot slots var) (lp vars)))
  603. (_ '()))))
  604. (define (compute-live-slots* slots label live-vars)
  605. (intset-fold (lambda (var live)
  606. (match (get-slot slots var)
  607. (#f live)
  608. (slot (add-live-slot slot live))))
  609. (intmap-ref live-vars label)
  610. 0))
  611. (define (compute-live-in-slots slots label)
  612. (compute-live-slots* slots label live-in))
  613. (define (compute-live-out-slots slots label)
  614. (compute-live-slots* slots label live-out))
  615. (define slot-desc-dead 0)
  616. (define slot-desc-live-raw 1)
  617. (define slot-desc-live-scm 2)
  618. (define slot-desc-unused 3)
  619. (define (compute-slot-map slots live-vars nslots)
  620. (intset-fold
  621. (lambda (var slot-map)
  622. (match (get-slot slots var)
  623. (#f slot-map)
  624. (slot
  625. (let ((desc (match (intmap-ref representations var)
  626. ((or 'u64 'f64 's64 'ptr) slot-desc-live-raw)
  627. ('scm slot-desc-live-scm))))
  628. (logior slot-map (ash desc (* 2 slot)))))))
  629. live-vars 0))
  630. (define (allocate var hint slots live)
  631. (cond
  632. ((not (intset-ref needs-slot var))
  633. (values slots live))
  634. ((get-slot slots var)
  635. => (lambda (slot)
  636. (values slots (add-live-slot slot live))))
  637. ((and (not hint) (intset-ref lazy var))
  638. (values slots live))
  639. (else
  640. (let ((slot (compute-slot live hint)))
  641. (values (intmap-add! slots var slot)
  642. (add-live-slot slot live))))))
  643. (define (allocate* vars hints slots live)
  644. (match (vector vars hints)
  645. (#(() ()) (values slots live))
  646. (#((var . vars) (hint . hints))
  647. (call-with-values (lambda () (allocate var hint slots live))
  648. (lambda (slots live)
  649. (allocate* vars hints slots live))))))
  650. (define (allocate-defs label vars slots)
  651. (let ((live (compute-live-in-slots slots label))
  652. (live-vars (intmap-ref live-in label)))
  653. (let lp ((vars vars) (slots slots) (live live))
  654. (match vars
  655. (() (values slots live))
  656. ((var . vars)
  657. (call-with-values (lambda () (allocate var #f slots live))
  658. (lambda (slots live)
  659. (lp vars slots
  660. (let ((slot (get-slot slots var)))
  661. (if (and slot (not (intset-ref live-vars var)))
  662. (kill-dead-slot slot live)
  663. live))))))))))
  664. ;; PRE-LIVE are the live slots coming into the term. POST-LIVE
  665. ;; is the subset of PRE-LIVE that is still live after the term
  666. ;; uses its inputs.
  667. (define (allocate-call label k args slots call-allocs pre-live)
  668. (match (get-cont k)
  669. (($ $ktail)
  670. (let ((tail-slots (integers 0 (length args))))
  671. (values (allocate* args tail-slots slots pre-live)
  672. call-allocs)))
  673. (($ $kreceive arity kargs)
  674. (allocate-call label kargs args slots call-allocs pre-live))
  675. (($ $kargs names results)
  676. (let*-values
  677. (((post-live) (compute-live-out-slots slots label))
  678. ((proc-slot) (compute-call-proc-slot post-live))
  679. ((call-slots) (integers proc-slot (length args)))
  680. ((slots pre-live) (allocate* args call-slots slots pre-live))
  681. ;; Allow the first result to be hinted by its use, but
  682. ;; hint the remaining results to stay in place. This
  683. ;; strikes a balance between avoiding shuffling,
  684. ;; especially for unused extra values, and avoiding frame
  685. ;; size growth due to sparse locals.
  686. ((slots result-live)
  687. (match results
  688. (()
  689. (values slots post-live))
  690. ((_ . results*)
  691. (let ((result-slots (integers (+ proc-slot 1)
  692. (length results*))))
  693. (allocate* results* result-slots slots post-live)))))
  694. ((slot-map) (compute-slot-map slots (intmap-ref live-out label)
  695. (- proc-slot frame-size)))
  696. ((call) (make-call-alloc proc-slot slot-map)))
  697. (values slots
  698. (intmap-add! call-allocs label call))))))
  699. (define (allocate-values label k args slots call-allocs)
  700. (match (get-cont k)
  701. (($ $ktail)
  702. (values slots call-allocs))
  703. (($ $kargs (_) (dst))
  704. ;; When there is only one value in play, we allow the dst to be
  705. ;; hinted (see compute-lazy-vars). If the src doesn't have a
  706. ;; slot, then the actual slot for the dst would end up being
  707. ;; decided by the call that args it. Because we don't know the
  708. ;; slot, we can't really compute the parallel moves in that
  709. ;; case, so just bail and rely on the bytecode emitter to
  710. ;; handle the one-value case specially.
  711. (match args
  712. ((src)
  713. (let ((post-live (compute-live-out-slots slots label)))
  714. (values (allocate dst (get-slot slots src) slots post-live)
  715. call-allocs)))))
  716. (($ $kargs _ dst-vars)
  717. (let ((src-slots (get-slots slots args))
  718. (post-live (compute-live-out-slots slots label)))
  719. (values (allocate* dst-vars src-slots slots post-live)
  720. call-allocs)))))
  721. (define (allocate-prompt label k handler slots call-allocs)
  722. (match (get-cont handler)
  723. (($ $kreceive arity kargs)
  724. (let*-values
  725. (((handler-live) (compute-live-in-slots slots handler))
  726. ((proc-slot) (compute-prompt-handler-proc-slot handler-live))
  727. ((slot-map) (compute-slot-map slots (intmap-ref live-in handler)
  728. (- proc-slot frame-size)))
  729. ((result-vars) (match (get-cont kargs)
  730. (($ $kargs names vars) vars)))
  731. ((value-slots) (integers proc-slot (length result-vars)))
  732. ((slots result-live) (allocate* result-vars value-slots
  733. slots handler-live)))
  734. (values slots
  735. (intmap-add! call-allocs label
  736. (make-call-alloc proc-slot slot-map)))))))
  737. (define (allocate-cont label cont slots call-allocs)
  738. (match cont
  739. (($ $kargs names vars term)
  740. (let-values (((slots live) (allocate-defs label vars slots)))
  741. (match term
  742. (($ $continue k src ($ $call proc args))
  743. (allocate-call label k (cons proc args) slots call-allocs live))
  744. (($ $continue k src ($ $callk _ proc args))
  745. (allocate-call label k (if proc (cons proc args) args)
  746. slots call-allocs live))
  747. (($ $continue k src ($ $values args))
  748. (allocate-values label k args slots call-allocs))
  749. (($ $prompt k kh src escape? tag)
  750. (allocate-prompt label k kh slots call-allocs))
  751. (_
  752. (values slots call-allocs)))))
  753. (_
  754. (values slots call-allocs))))
  755. (call-with-values (lambda ()
  756. (let ((slots (allocate-args cps)))
  757. (intmap-fold allocate-cont cps slots empty-intmap)))
  758. (lambda (slots calls)
  759. (let* ((slots (allocate-lazy-vars cps slots calls live-in lazy))
  760. (shuffles (compute-shuffles cps slots calls live-in))
  761. (frame-size (compute-frame-size cps slots calls shuffles)))
  762. (make-allocation slots representations calls shuffles frame-size))))))