ksr1.s 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /*
  2. * QuickThreads -- Threads-building toolkit.
  3. * Copyright (c) 1993 by David Keppel
  4. *
  5. * Permission to use, copy, modify and distribute this software and
  6. * its documentation for any purpose and without fee is hereby
  7. * granted, provided that the above copyright notice and this notice
  8. * appear in all copies. This software is provided as a
  9. * proof-of-concept and for demonstration purposes; there is no
  10. * representation about the suitability of this software for any
  11. * purpose.
  12. */
  13. .file "ksr1.s"
  14. .def .debug; .endef
  15. .align 128
  16. .globl qt_blocki
  17. .globl qt_blocki$TXT
  18. .globl qt_block
  19. .globl qt_block$TXT
  20. .globl qt_start$TXT
  21. .globl qt_start
  22. .globl qt_abort$TXT
  23. .globl qt_abort
  24. .globl qt_vstart
  25. .globl qt_vstart$TXT
  26. #
  27. # KSR convention: on procedure calls, load both the procedure address
  28. # and a pointer to a constant block. The address of function `f' is
  29. # `f$TXT', and the constant block address is `f'. The constant block
  30. # has several reserved values:
  31. #
  32. # 8 bytes fpu register save mask
  33. # 4 bytes ipu register save mask
  34. # 4 bytes ceu register save mask
  35. # f: f$TXT
  36. # ... whatever you want ... (not quite...read on)
  37. #
  38. # Note, by the way, that a pointer to a function is passed as a
  39. # pointer to the constant area, and the constant area has the text
  40. # address.
  41. #
  42. #
  43. # Procedures that do not return structures prefix their code with
  44. #
  45. # proc$TXT:
  46. # finop; cxnop
  47. # finop; cxnop
  48. # <proc code>
  49. #
  50. # Calls to those procedures branch to a 16 byte offset (4 instrs) in
  51. # to the procedure to skip those instructions.
  52. #
  53. # Procedures that return structures use a different code prefix:
  54. #
  55. # proc$TXT:
  56. # finop; beq.qt %rc, %rc, 24 # return value entry
  57. # finop; cxnop
  58. # finop; movi8 0, %rc # no return value entry
  59. # <proc code>
  60. #
  61. # Calls that want the returned structure branch directly to the
  62. # procedure address. Callers that don't want (or aren't expecting) a
  63. # return value branche 16 bytes in to the procedure, which will zero
  64. # %rc, telling the called procedure not to return a structure.
  65. #
  66. #
  67. # On entry:
  68. # %i2 -- control block of helper function to run
  69. # (dereference to get helper)
  70. # %i3 -- a1
  71. # %i4 -- a2
  72. # %i5 -- sp of new to run
  73. #
  74. .data
  75. .half 0x0, 0x0, 0x7ffff000, 0x7fff8000
  76. qt_blocki:
  77. qt_abort:
  78. .word qt_blocki$TXT
  79. .word qt_restore$TXT
  80. .text
  81. qt_abort$TXT:
  82. qt_blocki$TXT:
  83. finop ; cxnop # entry prefix
  84. finop ; cxnop # entry prefix
  85. add8.ntr 75,%i31,%i31 ; movi8 512,%c5 # ICR; stk adjust
  86. finop ; ssub8.ntr 0,%sp,%c5,%sp
  87. finop ; st8 %fp,504(%sp) # Save caller's fp
  88. finop ; st8 %cp,496(%sp) # Save caller's cp
  89. finop ; ld8 8(%c10),%c5 # ld qt_restore$TXT
  90. finop ; st8 %c14,0(%sp) # Save special ret addr
  91. finop ; mov8_8 %c10, %cp # Our cp
  92. finop ; sadd8.ntr 0,%sp,%c5,%fp # Our frame ptr
  93. finop ; st8 %c5,8(%sp) # st qt_restore$TXT
  94. #
  95. # CEU registers %c15-%c24, %c26-%c30 (%c14 we restore later)
  96. #
  97. finop ; st8 %c15,456(%sp)
  98. finop ; st8 %c16,448(%sp)
  99. finop ; st8 %c17,440(%sp)
  100. finop ; st8 %c18,432(%sp)
  101. finop ; st8 %c19,424(%sp)
  102. finop ; st8 %c20,416(%sp)
  103. finop ; st8 %c21,408(%sp)
  104. finop ; st8 %c22,400(%sp)
  105. finop ; st8 %c23,392(%sp)
  106. finop ; st8 %c24,384(%sp)
  107. #
  108. # %c25 is the Enclosing Frame Pointer (EFP) -- since C doesn't
  109. # use nested procedures, we ignore it (leaving a gap, though)
  110. #
  111. finop ; st8 %c26,368(%sp)
  112. finop ; st8 %c27,360(%sp)
  113. finop ; st8 %c28,352(%sp)
  114. finop ; st8 %c29,344(%sp)
  115. finop ; st8 %c30,336(%sp)
  116. #
  117. # IPU registers %i12-%i30
  118. #
  119. finop ; st8 %i12,328(%sp)
  120. finop ; st8 %i13,320(%sp)
  121. finop ; st8 %i14,312(%sp)
  122. finop ; st8 %i15,304(%sp)
  123. # (gap to get alignment for st64)
  124. # -- Doesn't work on version 1.1.3 of the OS
  125. # finop ; st64 %i16,256(%sp)
  126. finop ; st8 %i16,256(%sp)
  127. finop ; st8 %i17,248(%sp)
  128. finop ; st8 %i18,240(%sp)
  129. finop ; st8 %i19,232(%sp)
  130. finop ; st8 %i20,224(%sp)
  131. finop ; st8 %i21,216(%sp)
  132. finop ; st8 %i22,208(%sp)
  133. finop ; st8 %i23,200(%sp)
  134. finop ; st8 %i24,192(%sp)
  135. finop ; st8 %i25,184(%sp)
  136. finop ; st8 %i26,176(%sp)
  137. finop ; st8 %i27,168(%sp)
  138. finop ; st8 %i28,160(%sp)
  139. finop ; st8 %i29,152(%sp)
  140. finop ; st8 %i30,144(%sp)
  141. #
  142. # FPU already saved, or saving not necessary
  143. #
  144. #
  145. # Switch to the stack passed in as fourth argument to the block
  146. # routine (%i5) and call the helper routine passed in as the first
  147. # argument (%i2). Note that the address of the helper's constant
  148. # block is passed in, so we must derefence it to get the helper's text
  149. # address.
  150. #
  151. finop ; movb8_8 %i2,%c10 # helper's ConstBlock
  152. finop ; cxnop # Delay slot, fill w/
  153. finop ; cxnop # .. 2 st8 from above
  154. finop ; ld8 0(%c10),%c4 # load addr of helper
  155. finop ; movb8_8 %sp, %i2 # 1st arg to helper
  156. # is this stack; other
  157. # args remain in regs
  158. finop ; movb8_8 %i5,%sp # switch stacks
  159. finop ; jsr %c14,16(%c4) # call helper
  160. movi8 3, %i0 ; movi8 0,%c8 # nargs brain dmg
  161. finop ; cxnop
  162. finop ; cxnop
  163. #
  164. # Here is where behavior differs for threads being restored and threads
  165. # being started. Blocked threads have a pointer to qt_restore$TXT on
  166. # the top of their stacks; manufactured stacks have a pointer to qt_start$TXT
  167. # on the top of their stacks. With this setup, starting threads
  168. # skip the (unecessary) restore operations.
  169. #
  170. # We jump to an offset of 16 to either (1) skip past the two noop pairs
  171. # at the start of qt_start$TXT, or (2) skip past the two noop pairs
  172. # after qt_restore$TXT.
  173. #
  174. finop ; ld8 8(%sp),%c4
  175. finop ; cxnop
  176. finop ; cxnop
  177. finop ; jmp 16(%c4)
  178. qt_restore$TXT:
  179. finop ; cxnop
  180. finop ; cxnop
  181. #
  182. # Point of Restore:
  183. #
  184. # The helper funtion will return here. Any result it has placed in
  185. # a return register (most likely %i0) will not get overwritten below
  186. # and will consequently be the return value of the blocking routine.
  187. #
  188. #
  189. # CEU registers %c15-%c24, %c26-%c30 (%c14 we restore later)
  190. #
  191. finop ; ld8 456(%sp),%c15
  192. finop ; ld8 448(%sp),%c16
  193. finop ; ld8 440(%sp),%c17
  194. finop ; ld8 432(%sp),%c18
  195. finop ; ld8 424(%sp),%c19
  196. finop ; ld8 416(%sp),%c20
  197. finop ; ld8 408(%sp),%c21
  198. finop ; ld8 400(%sp),%c22
  199. finop ; ld8 392(%sp),%c23
  200. finop ; ld8 384(%sp),%c24
  201. #
  202. # %c25 is the Enclosing Frame Pointer (EFP) -- since C doesn't
  203. # use nested procedures, we ignore it (leaving a gap, though)
  204. #
  205. finop ; ld8 368(%sp),%c26
  206. finop ; ld8 360(%sp),%c27
  207. finop ; ld8 352(%sp),%c28
  208. finop ; ld8 344(%sp),%c29
  209. finop ; ld8 336(%sp),%c30
  210. #
  211. # IPU registers %i12-%i30
  212. #
  213. finop ; ld8 328(%sp),%i12
  214. finop ; ld8 320(%sp),%i13
  215. finop ; ld8 312(%sp),%i14
  216. finop ; ld8 304(%sp),%i15
  217. # (gap to get alignment for ld64)
  218. # -- Doesn't work on version 1.1.3 of the OS
  219. # finop ; ld64 256(%sp),%i16
  220. finop ; ld8 256(%sp),%i16
  221. finop ; ld8 248(%sp),%i17
  222. finop ; ld8 240(%sp),%i18
  223. finop ; ld8 232(%sp),%i19
  224. finop ; ld8 224(%sp),%i20
  225. finop ; ld8 216(%sp),%i21
  226. finop ; ld8 208(%sp),%i22
  227. finop ; ld8 200(%sp),%i23
  228. finop ; ld8 192(%sp),%i24
  229. finop ; ld8 184(%sp),%i25
  230. finop ; ld8 176(%sp),%i26
  231. finop ; ld8 168(%sp),%i27
  232. finop ; ld8 160(%sp),%i28
  233. finop ; ld8 152(%sp),%i29
  234. finop ; ld8 144(%sp),%i30
  235. #
  236. # FPU registers don't need to be loaded, or will be loaded by an
  237. # enclosing scope (e.g., if this is called by qt_block).
  238. #
  239. #
  240. # Load the special registers. We don't load the stack ptr because
  241. # the new stack is passed in as an argument, we don't load the EFP
  242. # because we don't use it, and we load the return address specially
  243. # off the top of the stack.
  244. #
  245. finop ; ld8 0(%sp),%c14 # return addr
  246. finop ; ld8 496(%sp),%cp
  247. finop ; ld8 504(%sp),%fp
  248. finop ; jmp 32(%c14) # jump back to thread
  249. finop ; movi8 512,%c5 # stack adjust
  250. finop ; sadd8.ntr 0,%sp,%c5,%sp
  251. .data
  252. .half 0x0, 0x0, 0x7ffff000, 0x7fff8000
  253. qt_block:
  254. .word qt_block$TXT
  255. .word qt_error
  256. .word qt_error$TXT
  257. .word qt_blocki
  258. #
  259. # Handle saving and restoring the FPU regs, relying on qt_blocki
  260. # to save and restore the remaining registers.
  261. #
  262. .text
  263. qt_block$TXT:
  264. finop ; cxnop # entry prefix
  265. finop ; cxnop # entry prefix
  266. add8.ntr 29,%i31,%i31 ; movi8 512,%c5 # ICR; stk adjust
  267. finop ; ssub8.ntr 0,%sp,%c5,%sp
  268. finop ; st8 %fp,504(%sp) # Save caller's fp
  269. finop ; st8 %cp,496(%sp) # Save caller's cp
  270. finop ; st8 %c14,488(%sp) # store ret addr
  271. finop ; sadd8.ntr 0,%sp,%c5,%fp # Our frame ptr
  272. finop ; mov8_8 %c10, %cp # Our cp
  273. #
  274. # Store 8 registers at once...destination must be a multiple of 64
  275. #
  276. finop ; st64 %f16,384(%sp)
  277. finop ; st64 %f24,320(%sp)
  278. finop ; st64 %f32,256(%sp)
  279. finop ; st64 %f40,192(%sp)
  280. finop ; st64 %f48,128(%sp)
  281. finop ; st64 %f56,64(%sp)
  282. #
  283. # Call the integer blocking routine, passing the arguments passed to us
  284. #
  285. finop ; ld8 24(%cp), %c10
  286. finop ; cxnop
  287. finop ; jsr %c14, qt_blocki$TXT
  288. finop ; cxnop
  289. finop ; cxnop
  290. movi8 4,%i0 ; movi8 0,%c8 # nargs brain dmg
  291. #
  292. # Load 8 registers at once...source must be a multiple of 64
  293. #
  294. finop ; ld64 64(%sp),%f56
  295. finop ; ld64 128(%sp),%f48
  296. finop ; ld64 192(%sp),%f40
  297. finop ; ld64 256(%sp),%f32
  298. finop ; ld64 320(%sp),%f24
  299. finop ; ld64 384(%sp),%f16
  300. finop ; ld8 488(%sp),%c14
  301. finop ; ld8 496(%sp),%cp
  302. finop ; ld8 504(%sp),%fp
  303. finop ; jmp 32(%c14) # jump back to thread
  304. finop ; movi8 512,%c5 # stack adjust
  305. finop ; sadd8.ntr 0,%sp,%c5,%sp
  306. .data
  307. .half 0x0, 0x0, 0x7ffff000, 0x7fff8000
  308. qt_start:
  309. .word qt_start$TXT
  310. #
  311. # A new thread is set up to "appear" as if it were executing code at
  312. # the beginning of qt_start and then it called a blocking routine
  313. # (qt_blocki). So when a new thread starts to run, it gets unblocked
  314. # by the code above and "returns" to `qt_start$TXT' in the
  315. # restore step of the switch. Blocked threads jump to 16(qt_restore$TXT),
  316. # and starting threads jump to 16(qt_start$TXT).
  317. #
  318. .text
  319. qt_start$TXT:
  320. finop ; cxnop #
  321. finop ; cxnop #
  322. finop ; ld8 40(%sp),%c10 # `only' constant block
  323. finop ; ld8 32(%sp),%i4 # `userf' arg.
  324. finop ; ld8 24(%sp),%i3 # `t' arg.
  325. finop ; ld8 0(%c10),%c4 # `only' text location
  326. finop ; ld8 16(%sp),%i2 # `u' arg.
  327. finop ; cxnop
  328. finop ; jsr %c14,16(%c4) # call `only'
  329. #
  330. # Pop the frame used to store the thread's initial data
  331. #
  332. finop ; sadd8.ntr 0,%sp,128,%sp
  333. finop ; cxnop
  334. movi8 2,%i0 ; movi8 0,%c8 # nargs brain dmg
  335. #
  336. # If we ever return, it's an error.
  337. #
  338. finop ; jmp qt_error$TXT
  339. finop ; cxnop
  340. finop ; cxnop
  341. movi8 0,%i0 ; movi8 0,%c8 # nargs brain dmg
  342. #
  343. # This stuff is broken
  344. #
  345. .data
  346. .half 0x0, 0x0, 0x7ffff000, 0x7fff8000
  347. qt_vstart:
  348. .word qt_vstart$TXT
  349. .text
  350. qt_vstart$TXT:
  351. finop ; cxnop # entry prefix
  352. finop ; cxnop # entry prefix
  353. finop ; cxnop
  354. finop ; cxnop
  355. add8.ntr 11,%i31,%i31 ; movi8 512,%c5
  356. finop ; ssub8.ntr 0,%sp,%c5,%sp # fix stack
  357. finop ; ld8 8(%sp),%i2 # load `t' as arg to
  358. finop ; cxnop # `startup'
  359. finop ; cxnop
  360. finop ; ld8 16(%sp),%c10 # `startup' const block
  361. finop ; cxnop
  362. finop ; cxnop
  363. finop ; ld8 0(%c10),%c4 # `startup' text loc.
  364. finop ; cxnop
  365. finop ; cxnop
  366. finop ; jsr %c14,16(%c4) # call `startup'
  367. finop ; cxnop
  368. finop ; cxnop
  369. movi8 1, %i0 ; movi8 0,%c8 # nargs brain dmg
  370. #
  371. # finop ; sadd 0,%sp,128,%sp # alter stack
  372. #
  373. finop ; ld8 8(%sp),%i2 # load `t' as arg to
  374. finop ; ld8 8(%sp),%i2 # load `t' as arg to
  375. finop ; ld8 8(%sp),%i2 # load `t' as arg to
  376. finop ; ld8 8(%sp),%i2 # load `t' as arg to
  377. finop ; ld8 32(%sp),%c10 # `only' constant block
  378. finop ; ld8 8(%sp),%i2 # `u' arg.
  379. finop ; ld8 16(%sp),%i3 # `t' arg.
  380. finop ; ld8 0(%c10),%c4 # `only' text location
  381. finop ; ld8 24(%sp),%i4 # `userf' arg.
  382. finop ; cxnop
  383. finop ; jsr %c4,16(%c4) # call `only'
  384. finop ; cxnop
  385. finop ; cxnop
  386. #
  387. # If the callee ever calls `nargs', the following instruction (pair)
  388. # will be executed. However, we don't know when we compile this code
  389. # how many args are being passed. So we give our best guess: 0.
  390. #
  391. movi8 0,%i0 ; movi8 0,%c8 # nargs brain dmg
  392. #
  393. # If we ever return, it's an error.
  394. #
  395. finop ; jmp qt_error$TXT
  396. finop ; cxnop
  397. finop ; cxnop
  398. movi8 0,%i0 ; movi8 0,%c8 # nargs brain dmg