trampoline.S 22 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /* Save all processor states
  2. *
  3. * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
  4. * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com>
  5. */
  6. #include <asm/asmmacro.h>
  7. #include "asm-offsets.h"
  8. #define CTX(name) VMM_CTX_##name##_OFFSET
  9. /*
  10. * r32: context_t base address
  11. */
  12. #define SAVE_BRANCH_REGS \
  13. add r2 = CTX(B0),r32; \
  14. add r3 = CTX(B1),r32; \
  15. mov r16 = b0; \
  16. mov r17 = b1; \
  17. ;; \
  18. st8 [r2]=r16,16; \
  19. st8 [r3]=r17,16; \
  20. ;; \
  21. mov r16 = b2; \
  22. mov r17 = b3; \
  23. ;; \
  24. st8 [r2]=r16,16; \
  25. st8 [r3]=r17,16; \
  26. ;; \
  27. mov r16 = b4; \
  28. mov r17 = b5; \
  29. ;; \
  30. st8 [r2]=r16; \
  31. st8 [r3]=r17; \
  32. ;;
  33. /*
  34. * r33: context_t base address
  35. */
  36. #define RESTORE_BRANCH_REGS \
  37. add r2 = CTX(B0),r33; \
  38. add r3 = CTX(B1),r33; \
  39. ;; \
  40. ld8 r16=[r2],16; \
  41. ld8 r17=[r3],16; \
  42. ;; \
  43. mov b0 = r16; \
  44. mov b1 = r17; \
  45. ;; \
  46. ld8 r16=[r2],16; \
  47. ld8 r17=[r3],16; \
  48. ;; \
  49. mov b2 = r16; \
  50. mov b3 = r17; \
  51. ;; \
  52. ld8 r16=[r2]; \
  53. ld8 r17=[r3]; \
  54. ;; \
  55. mov b4=r16; \
  56. mov b5=r17; \
  57. ;;
  58. /*
  59. * r32: context_t base address
  60. * bsw == 1
  61. * Save all bank1 general registers, r4 ~ r7
  62. */
  63. #define SAVE_GENERAL_REGS \
  64. add r2=CTX(R4),r32; \
  65. add r3=CTX(R5),r32; \
  66. ;; \
  67. .mem.offset 0,0; \
  68. st8.spill [r2]=r4,16; \
  69. .mem.offset 8,0; \
  70. st8.spill [r3]=r5,16; \
  71. ;; \
  72. .mem.offset 0,0; \
  73. st8.spill [r2]=r6,48; \
  74. .mem.offset 8,0; \
  75. st8.spill [r3]=r7,48; \
  76. ;; \
  77. .mem.offset 0,0; \
  78. st8.spill [r2]=r12; \
  79. .mem.offset 8,0; \
  80. st8.spill [r3]=r13; \
  81. ;;
  82. /*
  83. * r33: context_t base address
  84. * bsw == 1
  85. */
  86. #define RESTORE_GENERAL_REGS \
  87. add r2=CTX(R4),r33; \
  88. add r3=CTX(R5),r33; \
  89. ;; \
  90. ld8.fill r4=[r2],16; \
  91. ld8.fill r5=[r3],16; \
  92. ;; \
  93. ld8.fill r6=[r2],48; \
  94. ld8.fill r7=[r3],48; \
  95. ;; \
  96. ld8.fill r12=[r2]; \
  97. ld8.fill r13 =[r3]; \
  98. ;;
  99. /*
  100. * r32: context_t base address
  101. */
  102. #define SAVE_KERNEL_REGS \
  103. add r2 = CTX(KR0),r32; \
  104. add r3 = CTX(KR1),r32; \
  105. mov r16 = ar.k0; \
  106. mov r17 = ar.k1; \
  107. ;; \
  108. st8 [r2] = r16,16; \
  109. st8 [r3] = r17,16; \
  110. ;; \
  111. mov r16 = ar.k2; \
  112. mov r17 = ar.k3; \
  113. ;; \
  114. st8 [r2] = r16,16; \
  115. st8 [r3] = r17,16; \
  116. ;; \
  117. mov r16 = ar.k4; \
  118. mov r17 = ar.k5; \
  119. ;; \
  120. st8 [r2] = r16,16; \
  121. st8 [r3] = r17,16; \
  122. ;; \
  123. mov r16 = ar.k6; \
  124. mov r17 = ar.k7; \
  125. ;; \
  126. st8 [r2] = r16; \
  127. st8 [r3] = r17; \
  128. ;;
  129. /*
  130. * r33: context_t base address
  131. */
  132. #define RESTORE_KERNEL_REGS \
  133. add r2 = CTX(KR0),r33; \
  134. add r3 = CTX(KR1),r33; \
  135. ;; \
  136. ld8 r16=[r2],16; \
  137. ld8 r17=[r3],16; \
  138. ;; \
  139. mov ar.k0=r16; \
  140. mov ar.k1=r17; \
  141. ;; \
  142. ld8 r16=[r2],16; \
  143. ld8 r17=[r3],16; \
  144. ;; \
  145. mov ar.k2=r16; \
  146. mov ar.k3=r17; \
  147. ;; \
  148. ld8 r16=[r2],16; \
  149. ld8 r17=[r3],16; \
  150. ;; \
  151. mov ar.k4=r16; \
  152. mov ar.k5=r17; \
  153. ;; \
  154. ld8 r16=[r2],16; \
  155. ld8 r17=[r3],16; \
  156. ;; \
  157. mov ar.k6=r16; \
  158. mov ar.k7=r17; \
  159. ;;
  160. /*
  161. * r32: context_t base address
  162. */
  163. #define SAVE_APP_REGS \
  164. add r2 = CTX(BSPSTORE),r32; \
  165. mov r16 = ar.bspstore; \
  166. ;; \
  167. st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
  168. mov r16 = ar.rnat; \
  169. ;; \
  170. st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \
  171. mov r16 = ar.fcr; \
  172. ;; \
  173. st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \
  174. mov r16 = ar.eflag; \
  175. ;; \
  176. st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \
  177. mov r16 = ar.cflg; \
  178. ;; \
  179. st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \
  180. mov r16 = ar.fsr; \
  181. ;; \
  182. st8 [r2] = r16,CTX(FIR)-CTX(FSR); \
  183. mov r16 = ar.fir; \
  184. ;; \
  185. st8 [r2] = r16,CTX(FDR)-CTX(FIR); \
  186. mov r16 = ar.fdr; \
  187. ;; \
  188. st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \
  189. mov r16 = ar.unat; \
  190. ;; \
  191. st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \
  192. mov r16 = ar.fpsr; \
  193. ;; \
  194. st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \
  195. mov r16 = ar.pfs; \
  196. ;; \
  197. st8 [r2] = r16,CTX(LC)-CTX(PFS); \
  198. mov r16 = ar.lc; \
  199. ;; \
  200. st8 [r2] = r16; \
  201. ;;
  202. /*
  203. * r33: context_t base address
  204. */
  205. #define RESTORE_APP_REGS \
  206. add r2=CTX(BSPSTORE),r33; \
  207. ;; \
  208. ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \
  209. ;; \
  210. mov ar.bspstore=r16; \
  211. ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \
  212. ;; \
  213. mov ar.rnat=r16; \
  214. ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \
  215. ;; \
  216. mov ar.fcr=r16; \
  217. ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \
  218. ;; \
  219. mov ar.eflag=r16; \
  220. ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \
  221. ;; \
  222. mov ar.cflg=r16; \
  223. ld8 r16=[r2],CTX(FIR)-CTX(FSR); \
  224. ;; \
  225. mov ar.fsr=r16; \
  226. ld8 r16=[r2],CTX(FDR)-CTX(FIR); \
  227. ;; \
  228. mov ar.fir=r16; \
  229. ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \
  230. ;; \
  231. mov ar.fdr=r16; \
  232. ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \
  233. ;; \
  234. mov ar.unat=r16; \
  235. ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \
  236. ;; \
  237. mov ar.fpsr=r16; \
  238. ld8 r16=[r2],CTX(LC)-CTX(PFS); \
  239. ;; \
  240. mov ar.pfs=r16; \
  241. ld8 r16=[r2]; \
  242. ;; \
  243. mov ar.lc=r16; \
  244. ;;
  245. /*
  246. * r32: context_t base address
  247. */
  248. #define SAVE_CTL_REGS \
  249. add r2 = CTX(DCR),r32; \
  250. mov r16 = cr.dcr; \
  251. ;; \
  252. st8 [r2] = r16,CTX(IVA)-CTX(DCR); \
  253. ;; \
  254. mov r16 = cr.iva; \
  255. ;; \
  256. st8 [r2] = r16,CTX(PTA)-CTX(IVA); \
  257. ;; \
  258. mov r16 = cr.pta; \
  259. ;; \
  260. st8 [r2] = r16 ; \
  261. ;;
  262. /*
  263. * r33: context_t base address
  264. */
  265. #define RESTORE_CTL_REGS \
  266. add r2 = CTX(DCR),r33; \
  267. ;; \
  268. ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \
  269. ;; \
  270. mov cr.dcr = r16; \
  271. dv_serialize_data; \
  272. ;; \
  273. ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \
  274. ;; \
  275. mov cr.iva = r16; \
  276. dv_serialize_data; \
  277. ;; \
  278. ld8 r16 = [r2]; \
  279. ;; \
  280. mov cr.pta = r16; \
  281. dv_serialize_data; \
  282. ;;
  283. /*
  284. * r32: context_t base address
  285. */
  286. #define SAVE_REGION_REGS \
  287. add r2=CTX(RR0),r32; \
  288. mov r16=rr[r0]; \
  289. dep.z r18=1,61,3; \
  290. ;; \
  291. st8 [r2]=r16,8; \
  292. mov r17=rr[r18]; \
  293. dep.z r18=2,61,3; \
  294. ;; \
  295. st8 [r2]=r17,8; \
  296. mov r16=rr[r18]; \
  297. dep.z r18=3,61,3; \
  298. ;; \
  299. st8 [r2]=r16,8; \
  300. mov r17=rr[r18]; \
  301. dep.z r18=4,61,3; \
  302. ;; \
  303. st8 [r2]=r17,8; \
  304. mov r16=rr[r18]; \
  305. dep.z r18=5,61,3; \
  306. ;; \
  307. st8 [r2]=r16,8; \
  308. mov r17=rr[r18]; \
  309. dep.z r18=7,61,3; \
  310. ;; \
  311. st8 [r2]=r17,16; \
  312. mov r16=rr[r18]; \
  313. ;; \
  314. st8 [r2]=r16,8; \
  315. ;;
  316. /*
  317. * r33:context_t base address
  318. */
  319. #define RESTORE_REGION_REGS \
  320. add r2=CTX(RR0),r33;\
  321. mov r18=r0; \
  322. ;; \
  323. ld8 r20=[r2],8; \
  324. ;; /* rr0 */ \
  325. ld8 r21=[r2],8; \
  326. ;; /* rr1 */ \
  327. ld8 r22=[r2],8; \
  328. ;; /* rr2 */ \
  329. ld8 r23=[r2],8; \
  330. ;; /* rr3 */ \
  331. ld8 r24=[r2],8; \
  332. ;; /* rr4 */ \
  333. ld8 r25=[r2],16; \
  334. ;; /* rr5 */ \
  335. ld8 r27=[r2]; \
  336. ;; /* rr7 */ \
  337. mov rr[r18]=r20; \
  338. dep.z r18=1,61,3; \
  339. ;; /* rr1 */ \
  340. mov rr[r18]=r21; \
  341. dep.z r18=2,61,3; \
  342. ;; /* rr2 */ \
  343. mov rr[r18]=r22; \
  344. dep.z r18=3,61,3; \
  345. ;; /* rr3 */ \
  346. mov rr[r18]=r23; \
  347. dep.z r18=4,61,3; \
  348. ;; /* rr4 */ \
  349. mov rr[r18]=r24; \
  350. dep.z r18=5,61,3; \
  351. ;; /* rr5 */ \
  352. mov rr[r18]=r25; \
  353. dep.z r18=7,61,3; \
  354. ;; /* rr7 */ \
  355. mov rr[r18]=r27; \
  356. ;; \
  357. srlz.i; \
  358. ;;
  359. /*
  360. * r32: context_t base address
  361. * r36~r39:scratch registers
  362. */
  363. #define SAVE_DEBUG_REGS \
  364. add r2=CTX(IBR0),r32; \
  365. add r3=CTX(DBR0),r32; \
  366. mov r16=ibr[r0]; \
  367. mov r17=dbr[r0]; \
  368. ;; \
  369. st8 [r2]=r16,8; \
  370. st8 [r3]=r17,8; \
  371. add r18=1,r0; \
  372. ;; \
  373. mov r16=ibr[r18]; \
  374. mov r17=dbr[r18]; \
  375. ;; \
  376. st8 [r2]=r16,8; \
  377. st8 [r3]=r17,8; \
  378. add r18=2,r0; \
  379. ;; \
  380. mov r16=ibr[r18]; \
  381. mov r17=dbr[r18]; \
  382. ;; \
  383. st8 [r2]=r16,8; \
  384. st8 [r3]=r17,8; \
  385. add r18=2,r0; \
  386. ;; \
  387. mov r16=ibr[r18]; \
  388. mov r17=dbr[r18]; \
  389. ;; \
  390. st8 [r2]=r16,8; \
  391. st8 [r3]=r17,8; \
  392. add r18=3,r0; \
  393. ;; \
  394. mov r16=ibr[r18]; \
  395. mov r17=dbr[r18]; \
  396. ;; \
  397. st8 [r2]=r16,8; \
  398. st8 [r3]=r17,8; \
  399. add r18=4,r0; \
  400. ;; \
  401. mov r16=ibr[r18]; \
  402. mov r17=dbr[r18]; \
  403. ;; \
  404. st8 [r2]=r16,8; \
  405. st8 [r3]=r17,8; \
  406. add r18=5,r0; \
  407. ;; \
  408. mov r16=ibr[r18]; \
  409. mov r17=dbr[r18]; \
  410. ;; \
  411. st8 [r2]=r16,8; \
  412. st8 [r3]=r17,8; \
  413. add r18=6,r0; \
  414. ;; \
  415. mov r16=ibr[r18]; \
  416. mov r17=dbr[r18]; \
  417. ;; \
  418. st8 [r2]=r16,8; \
  419. st8 [r3]=r17,8; \
  420. add r18=7,r0; \
  421. ;; \
  422. mov r16=ibr[r18]; \
  423. mov r17=dbr[r18]; \
  424. ;; \
  425. st8 [r2]=r16,8; \
  426. st8 [r3]=r17,8; \
  427. ;;
  428. /*
  429. * r33: point to context_t structure
  430. * ar.lc are corrupted.
  431. */
  432. #define RESTORE_DEBUG_REGS \
  433. add r2=CTX(IBR0),r33; \
  434. add r3=CTX(DBR0),r33; \
  435. mov r16=7; \
  436. mov r17=r0; \
  437. ;; \
  438. mov ar.lc = r16; \
  439. ;; \
  440. 1: \
  441. ld8 r18=[r2],8; \
  442. ld8 r19=[r3],8; \
  443. ;; \
  444. mov ibr[r17]=r18; \
  445. mov dbr[r17]=r19; \
  446. ;; \
  447. srlz.i; \
  448. ;; \
  449. add r17=1,r17; \
  450. br.cloop.sptk 1b; \
  451. ;;
  452. /*
  453. * r32: context_t base address
  454. */
  455. #define SAVE_FPU_LOW \
  456. add r2=CTX(F2),r32; \
  457. add r3=CTX(F3),r32; \
  458. ;; \
  459. stf.spill.nta [r2]=f2,32; \
  460. stf.spill.nta [r3]=f3,32; \
  461. ;; \
  462. stf.spill.nta [r2]=f4,32; \
  463. stf.spill.nta [r3]=f5,32; \
  464. ;; \
  465. stf.spill.nta [r2]=f6,32; \
  466. stf.spill.nta [r3]=f7,32; \
  467. ;; \
  468. stf.spill.nta [r2]=f8,32; \
  469. stf.spill.nta [r3]=f9,32; \
  470. ;; \
  471. stf.spill.nta [r2]=f10,32; \
  472. stf.spill.nta [r3]=f11,32; \
  473. ;; \
  474. stf.spill.nta [r2]=f12,32; \
  475. stf.spill.nta [r3]=f13,32; \
  476. ;; \
  477. stf.spill.nta [r2]=f14,32; \
  478. stf.spill.nta [r3]=f15,32; \
  479. ;; \
  480. stf.spill.nta [r2]=f16,32; \
  481. stf.spill.nta [r3]=f17,32; \
  482. ;; \
  483. stf.spill.nta [r2]=f18,32; \
  484. stf.spill.nta [r3]=f19,32; \
  485. ;; \
  486. stf.spill.nta [r2]=f20,32; \
  487. stf.spill.nta [r3]=f21,32; \
  488. ;; \
  489. stf.spill.nta [r2]=f22,32; \
  490. stf.spill.nta [r3]=f23,32; \
  491. ;; \
  492. stf.spill.nta [r2]=f24,32; \
  493. stf.spill.nta [r3]=f25,32; \
  494. ;; \
  495. stf.spill.nta [r2]=f26,32; \
  496. stf.spill.nta [r3]=f27,32; \
  497. ;; \
  498. stf.spill.nta [r2]=f28,32; \
  499. stf.spill.nta [r3]=f29,32; \
  500. ;; \
  501. stf.spill.nta [r2]=f30; \
  502. stf.spill.nta [r3]=f31; \
  503. ;;
  504. /*
  505. * r32: context_t base address
  506. */
  507. #define SAVE_FPU_HIGH \
  508. add r2=CTX(F32),r32; \
  509. add r3=CTX(F33),r32; \
  510. ;; \
  511. stf.spill.nta [r2]=f32,32; \
  512. stf.spill.nta [r3]=f33,32; \
  513. ;; \
  514. stf.spill.nta [r2]=f34,32; \
  515. stf.spill.nta [r3]=f35,32; \
  516. ;; \
  517. stf.spill.nta [r2]=f36,32; \
  518. stf.spill.nta [r3]=f37,32; \
  519. ;; \
  520. stf.spill.nta [r2]=f38,32; \
  521. stf.spill.nta [r3]=f39,32; \
  522. ;; \
  523. stf.spill.nta [r2]=f40,32; \
  524. stf.spill.nta [r3]=f41,32; \
  525. ;; \
  526. stf.spill.nta [r2]=f42,32; \
  527. stf.spill.nta [r3]=f43,32; \
  528. ;; \
  529. stf.spill.nta [r2]=f44,32; \
  530. stf.spill.nta [r3]=f45,32; \
  531. ;; \
  532. stf.spill.nta [r2]=f46,32; \
  533. stf.spill.nta [r3]=f47,32; \
  534. ;; \
  535. stf.spill.nta [r2]=f48,32; \
  536. stf.spill.nta [r3]=f49,32; \
  537. ;; \
  538. stf.spill.nta [r2]=f50,32; \
  539. stf.spill.nta [r3]=f51,32; \
  540. ;; \
  541. stf.spill.nta [r2]=f52,32; \
  542. stf.spill.nta [r3]=f53,32; \
  543. ;; \
  544. stf.spill.nta [r2]=f54,32; \
  545. stf.spill.nta [r3]=f55,32; \
  546. ;; \
  547. stf.spill.nta [r2]=f56,32; \
  548. stf.spill.nta [r3]=f57,32; \
  549. ;; \
  550. stf.spill.nta [r2]=f58,32; \
  551. stf.spill.nta [r3]=f59,32; \
  552. ;; \
  553. stf.spill.nta [r2]=f60,32; \
  554. stf.spill.nta [r3]=f61,32; \
  555. ;; \
  556. stf.spill.nta [r2]=f62,32; \
  557. stf.spill.nta [r3]=f63,32; \
  558. ;; \
  559. stf.spill.nta [r2]=f64,32; \
  560. stf.spill.nta [r3]=f65,32; \
  561. ;; \
  562. stf.spill.nta [r2]=f66,32; \
  563. stf.spill.nta [r3]=f67,32; \
  564. ;; \
  565. stf.spill.nta [r2]=f68,32; \
  566. stf.spill.nta [r3]=f69,32; \
  567. ;; \
  568. stf.spill.nta [r2]=f70,32; \
  569. stf.spill.nta [r3]=f71,32; \
  570. ;; \
  571. stf.spill.nta [r2]=f72,32; \
  572. stf.spill.nta [r3]=f73,32; \
  573. ;; \
  574. stf.spill.nta [r2]=f74,32; \
  575. stf.spill.nta [r3]=f75,32; \
  576. ;; \
  577. stf.spill.nta [r2]=f76,32; \
  578. stf.spill.nta [r3]=f77,32; \
  579. ;; \
  580. stf.spill.nta [r2]=f78,32; \
  581. stf.spill.nta [r3]=f79,32; \
  582. ;; \
  583. stf.spill.nta [r2]=f80,32; \
  584. stf.spill.nta [r3]=f81,32; \
  585. ;; \
  586. stf.spill.nta [r2]=f82,32; \
  587. stf.spill.nta [r3]=f83,32; \
  588. ;; \
  589. stf.spill.nta [r2]=f84,32; \
  590. stf.spill.nta [r3]=f85,32; \
  591. ;; \
  592. stf.spill.nta [r2]=f86,32; \
  593. stf.spill.nta [r3]=f87,32; \
  594. ;; \
  595. stf.spill.nta [r2]=f88,32; \
  596. stf.spill.nta [r3]=f89,32; \
  597. ;; \
  598. stf.spill.nta [r2]=f90,32; \
  599. stf.spill.nta [r3]=f91,32; \
  600. ;; \
  601. stf.spill.nta [r2]=f92,32; \
  602. stf.spill.nta [r3]=f93,32; \
  603. ;; \
  604. stf.spill.nta [r2]=f94,32; \
  605. stf.spill.nta [r3]=f95,32; \
  606. ;; \
  607. stf.spill.nta [r2]=f96,32; \
  608. stf.spill.nta [r3]=f97,32; \
  609. ;; \
  610. stf.spill.nta [r2]=f98,32; \
  611. stf.spill.nta [r3]=f99,32; \
  612. ;; \
  613. stf.spill.nta [r2]=f100,32; \
  614. stf.spill.nta [r3]=f101,32; \
  615. ;; \
  616. stf.spill.nta [r2]=f102,32; \
  617. stf.spill.nta [r3]=f103,32; \
  618. ;; \
  619. stf.spill.nta [r2]=f104,32; \
  620. stf.spill.nta [r3]=f105,32; \
  621. ;; \
  622. stf.spill.nta [r2]=f106,32; \
  623. stf.spill.nta [r3]=f107,32; \
  624. ;; \
  625. stf.spill.nta [r2]=f108,32; \
  626. stf.spill.nta [r3]=f109,32; \
  627. ;; \
  628. stf.spill.nta [r2]=f110,32; \
  629. stf.spill.nta [r3]=f111,32; \
  630. ;; \
  631. stf.spill.nta [r2]=f112,32; \
  632. stf.spill.nta [r3]=f113,32; \
  633. ;; \
  634. stf.spill.nta [r2]=f114,32; \
  635. stf.spill.nta [r3]=f115,32; \
  636. ;; \
  637. stf.spill.nta [r2]=f116,32; \
  638. stf.spill.nta [r3]=f117,32; \
  639. ;; \
  640. stf.spill.nta [r2]=f118,32; \
  641. stf.spill.nta [r3]=f119,32; \
  642. ;; \
  643. stf.spill.nta [r2]=f120,32; \
  644. stf.spill.nta [r3]=f121,32; \
  645. ;; \
  646. stf.spill.nta [r2]=f122,32; \
  647. stf.spill.nta [r3]=f123,32; \
  648. ;; \
  649. stf.spill.nta [r2]=f124,32; \
  650. stf.spill.nta [r3]=f125,32; \
  651. ;; \
  652. stf.spill.nta [r2]=f126; \
  653. stf.spill.nta [r3]=f127; \
  654. ;;
  655. /*
  656. * r33: point to context_t structure
  657. */
  658. #define RESTORE_FPU_LOW \
  659. add r2 = CTX(F2), r33; \
  660. add r3 = CTX(F3), r33; \
  661. ;; \
  662. ldf.fill.nta f2 = [r2], 32; \
  663. ldf.fill.nta f3 = [r3], 32; \
  664. ;; \
  665. ldf.fill.nta f4 = [r2], 32; \
  666. ldf.fill.nta f5 = [r3], 32; \
  667. ;; \
  668. ldf.fill.nta f6 = [r2], 32; \
  669. ldf.fill.nta f7 = [r3], 32; \
  670. ;; \
  671. ldf.fill.nta f8 = [r2], 32; \
  672. ldf.fill.nta f9 = [r3], 32; \
  673. ;; \
  674. ldf.fill.nta f10 = [r2], 32; \
  675. ldf.fill.nta f11 = [r3], 32; \
  676. ;; \
  677. ldf.fill.nta f12 = [r2], 32; \
  678. ldf.fill.nta f13 = [r3], 32; \
  679. ;; \
  680. ldf.fill.nta f14 = [r2], 32; \
  681. ldf.fill.nta f15 = [r3], 32; \
  682. ;; \
  683. ldf.fill.nta f16 = [r2], 32; \
  684. ldf.fill.nta f17 = [r3], 32; \
  685. ;; \
  686. ldf.fill.nta f18 = [r2], 32; \
  687. ldf.fill.nta f19 = [r3], 32; \
  688. ;; \
  689. ldf.fill.nta f20 = [r2], 32; \
  690. ldf.fill.nta f21 = [r3], 32; \
  691. ;; \
  692. ldf.fill.nta f22 = [r2], 32; \
  693. ldf.fill.nta f23 = [r3], 32; \
  694. ;; \
  695. ldf.fill.nta f24 = [r2], 32; \
  696. ldf.fill.nta f25 = [r3], 32; \
  697. ;; \
  698. ldf.fill.nta f26 = [r2], 32; \
  699. ldf.fill.nta f27 = [r3], 32; \
  700. ;; \
  701. ldf.fill.nta f28 = [r2], 32; \
  702. ldf.fill.nta f29 = [r3], 32; \
  703. ;; \
  704. ldf.fill.nta f30 = [r2], 32; \
  705. ldf.fill.nta f31 = [r3], 32; \
  706. ;;
  707. /*
  708. * r33: point to context_t structure
  709. */
  710. #define RESTORE_FPU_HIGH \
  711. add r2 = CTX(F32), r33; \
  712. add r3 = CTX(F33), r33; \
  713. ;; \
  714. ldf.fill.nta f32 = [r2], 32; \
  715. ldf.fill.nta f33 = [r3], 32; \
  716. ;; \
  717. ldf.fill.nta f34 = [r2], 32; \
  718. ldf.fill.nta f35 = [r3], 32; \
  719. ;; \
  720. ldf.fill.nta f36 = [r2], 32; \
  721. ldf.fill.nta f37 = [r3], 32; \
  722. ;; \
  723. ldf.fill.nta f38 = [r2], 32; \
  724. ldf.fill.nta f39 = [r3], 32; \
  725. ;; \
  726. ldf.fill.nta f40 = [r2], 32; \
  727. ldf.fill.nta f41 = [r3], 32; \
  728. ;; \
  729. ldf.fill.nta f42 = [r2], 32; \
  730. ldf.fill.nta f43 = [r3], 32; \
  731. ;; \
  732. ldf.fill.nta f44 = [r2], 32; \
  733. ldf.fill.nta f45 = [r3], 32; \
  734. ;; \
  735. ldf.fill.nta f46 = [r2], 32; \
  736. ldf.fill.nta f47 = [r3], 32; \
  737. ;; \
  738. ldf.fill.nta f48 = [r2], 32; \
  739. ldf.fill.nta f49 = [r3], 32; \
  740. ;; \
  741. ldf.fill.nta f50 = [r2], 32; \
  742. ldf.fill.nta f51 = [r3], 32; \
  743. ;; \
  744. ldf.fill.nta f52 = [r2], 32; \
  745. ldf.fill.nta f53 = [r3], 32; \
  746. ;; \
  747. ldf.fill.nta f54 = [r2], 32; \
  748. ldf.fill.nta f55 = [r3], 32; \
  749. ;; \
  750. ldf.fill.nta f56 = [r2], 32; \
  751. ldf.fill.nta f57 = [r3], 32; \
  752. ;; \
  753. ldf.fill.nta f58 = [r2], 32; \
  754. ldf.fill.nta f59 = [r3], 32; \
  755. ;; \
  756. ldf.fill.nta f60 = [r2], 32; \
  757. ldf.fill.nta f61 = [r3], 32; \
  758. ;; \
  759. ldf.fill.nta f62 = [r2], 32; \
  760. ldf.fill.nta f63 = [r3], 32; \
  761. ;; \
  762. ldf.fill.nta f64 = [r2], 32; \
  763. ldf.fill.nta f65 = [r3], 32; \
  764. ;; \
  765. ldf.fill.nta f66 = [r2], 32; \
  766. ldf.fill.nta f67 = [r3], 32; \
  767. ;; \
  768. ldf.fill.nta f68 = [r2], 32; \
  769. ldf.fill.nta f69 = [r3], 32; \
  770. ;; \
  771. ldf.fill.nta f70 = [r2], 32; \
  772. ldf.fill.nta f71 = [r3], 32; \
  773. ;; \
  774. ldf.fill.nta f72 = [r2], 32; \
  775. ldf.fill.nta f73 = [r3], 32; \
  776. ;; \
  777. ldf.fill.nta f74 = [r2], 32; \
  778. ldf.fill.nta f75 = [r3], 32; \
  779. ;; \
  780. ldf.fill.nta f76 = [r2], 32; \
  781. ldf.fill.nta f77 = [r3], 32; \
  782. ;; \
  783. ldf.fill.nta f78 = [r2], 32; \
  784. ldf.fill.nta f79 = [r3], 32; \
  785. ;; \
  786. ldf.fill.nta f80 = [r2], 32; \
  787. ldf.fill.nta f81 = [r3], 32; \
  788. ;; \
  789. ldf.fill.nta f82 = [r2], 32; \
  790. ldf.fill.nta f83 = [r3], 32; \
  791. ;; \
  792. ldf.fill.nta f84 = [r2], 32; \
  793. ldf.fill.nta f85 = [r3], 32; \
  794. ;; \
  795. ldf.fill.nta f86 = [r2], 32; \
  796. ldf.fill.nta f87 = [r3], 32; \
  797. ;; \
  798. ldf.fill.nta f88 = [r2], 32; \
  799. ldf.fill.nta f89 = [r3], 32; \
  800. ;; \
  801. ldf.fill.nta f90 = [r2], 32; \
  802. ldf.fill.nta f91 = [r3], 32; \
  803. ;; \
  804. ldf.fill.nta f92 = [r2], 32; \
  805. ldf.fill.nta f93 = [r3], 32; \
  806. ;; \
  807. ldf.fill.nta f94 = [r2], 32; \
  808. ldf.fill.nta f95 = [r3], 32; \
  809. ;; \
  810. ldf.fill.nta f96 = [r2], 32; \
  811. ldf.fill.nta f97 = [r3], 32; \
  812. ;; \
  813. ldf.fill.nta f98 = [r2], 32; \
  814. ldf.fill.nta f99 = [r3], 32; \
  815. ;; \
  816. ldf.fill.nta f100 = [r2], 32; \
  817. ldf.fill.nta f101 = [r3], 32; \
  818. ;; \
  819. ldf.fill.nta f102 = [r2], 32; \
  820. ldf.fill.nta f103 = [r3], 32; \
  821. ;; \
  822. ldf.fill.nta f104 = [r2], 32; \
  823. ldf.fill.nta f105 = [r3], 32; \
  824. ;; \
  825. ldf.fill.nta f106 = [r2], 32; \
  826. ldf.fill.nta f107 = [r3], 32; \
  827. ;; \
  828. ldf.fill.nta f108 = [r2], 32; \
  829. ldf.fill.nta f109 = [r3], 32; \
  830. ;; \
  831. ldf.fill.nta f110 = [r2], 32; \
  832. ldf.fill.nta f111 = [r3], 32; \
  833. ;; \
  834. ldf.fill.nta f112 = [r2], 32; \
  835. ldf.fill.nta f113 = [r3], 32; \
  836. ;; \
  837. ldf.fill.nta f114 = [r2], 32; \
  838. ldf.fill.nta f115 = [r3], 32; \
  839. ;; \
  840. ldf.fill.nta f116 = [r2], 32; \
  841. ldf.fill.nta f117 = [r3], 32; \
  842. ;; \
  843. ldf.fill.nta f118 = [r2], 32; \
  844. ldf.fill.nta f119 = [r3], 32; \
  845. ;; \
  846. ldf.fill.nta f120 = [r2], 32; \
  847. ldf.fill.nta f121 = [r3], 32; \
  848. ;; \
  849. ldf.fill.nta f122 = [r2], 32; \
  850. ldf.fill.nta f123 = [r3], 32; \
  851. ;; \
  852. ldf.fill.nta f124 = [r2], 32; \
  853. ldf.fill.nta f125 = [r3], 32; \
  854. ;; \
  855. ldf.fill.nta f126 = [r2], 32; \
  856. ldf.fill.nta f127 = [r3], 32; \
  857. ;;
  858. /*
  859. * r32: context_t base address
  860. */
  861. #define SAVE_PTK_REGS \
  862. add r2=CTX(PKR0), r32; \
  863. mov r16=7; \
  864. ;; \
  865. mov ar.lc=r16; \
  866. mov r17=r0; \
  867. ;; \
  868. 1: \
  869. mov r18=pkr[r17]; \
  870. ;; \
  871. srlz.i; \
  872. ;; \
  873. st8 [r2]=r18, 8; \
  874. ;; \
  875. add r17 =1,r17; \
  876. ;; \
  877. br.cloop.sptk 1b; \
  878. ;;
  879. /*
  880. * r33: point to context_t structure
  881. * ar.lc are corrupted.
  882. */
  883. #define RESTORE_PTK_REGS \
  884. add r2=CTX(PKR0), r33; \
  885. mov r16=7; \
  886. ;; \
  887. mov ar.lc=r16; \
  888. mov r17=r0; \
  889. ;; \
  890. 1: \
  891. ld8 r18=[r2], 8; \
  892. ;; \
  893. mov pkr[r17]=r18; \
  894. ;; \
  895. srlz.i; \
  896. ;; \
  897. add r17 =1,r17; \
  898. ;; \
  899. br.cloop.sptk 1b; \
  900. ;;
  901. /*
  902. * void vmm_trampoline( context_t * from,
  903. * context_t * to)
  904. *
  905. * from: r32
  906. * to: r33
  907. * note: interrupt disabled before call this function.
  908. */
  909. GLOBAL_ENTRY(vmm_trampoline)
  910. mov r16 = psr
  911. adds r2 = CTX(PSR), r32
  912. ;;
  913. st8 [r2] = r16, 8 // psr
  914. mov r17 = pr
  915. ;;
  916. st8 [r2] = r17, 8 // pr
  917. mov r18 = ar.unat
  918. ;;
  919. st8 [r2] = r18
  920. mov r17 = ar.rsc
  921. ;;
  922. adds r2 = CTX(RSC),r32
  923. ;;
  924. st8 [r2]= r17
  925. mov ar.rsc =0
  926. flushrs
  927. ;;
  928. SAVE_GENERAL_REGS
  929. ;;
  930. SAVE_KERNEL_REGS
  931. ;;
  932. SAVE_APP_REGS
  933. ;;
  934. SAVE_BRANCH_REGS
  935. ;;
  936. SAVE_CTL_REGS
  937. ;;
  938. SAVE_REGION_REGS
  939. ;;
  940. //SAVE_DEBUG_REGS
  941. ;;
  942. rsm psr.dfl
  943. ;;
  944. srlz.d
  945. ;;
  946. SAVE_FPU_LOW
  947. ;;
  948. rsm psr.dfh
  949. ;;
  950. srlz.d
  951. ;;
  952. SAVE_FPU_HIGH
  953. ;;
  954. SAVE_PTK_REGS
  955. ;;
  956. RESTORE_PTK_REGS
  957. ;;
  958. RESTORE_FPU_HIGH
  959. ;;
  960. RESTORE_FPU_LOW
  961. ;;
  962. //RESTORE_DEBUG_REGS
  963. ;;
  964. RESTORE_REGION_REGS
  965. ;;
  966. RESTORE_CTL_REGS
  967. ;;
  968. RESTORE_BRANCH_REGS
  969. ;;
  970. RESTORE_APP_REGS
  971. ;;
  972. RESTORE_KERNEL_REGS
  973. ;;
  974. RESTORE_GENERAL_REGS
  975. ;;
  976. adds r2=CTX(PSR), r33
  977. ;;
  978. ld8 r16=[r2], 8 // psr
  979. ;;
  980. mov psr.l=r16
  981. ;;
  982. srlz.d
  983. ;;
  984. ld8 r16=[r2], 8 // pr
  985. ;;
  986. mov pr =r16,-1
  987. ld8 r16=[r2] // unat
  988. ;;
  989. mov ar.unat=r16
  990. ;;
  991. adds r2=CTX(RSC),r33
  992. ;;
  993. ld8 r16 =[r2]
  994. ;;
  995. mov ar.rsc = r16
  996. ;;
  997. br.ret.sptk.few b0
  998. END(vmm_trampoline)