nvc0_grgpc.fuc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /* fuc microcode for nvc0 PGRAPH/GPC
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. /* To build:
  26. * m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h
  27. */
  28. /* TODO
  29. * - bracket certain functions with scratch writes, useful for debugging
  30. * - watchdog timer around ctx operations
  31. */
  32. .section #nvc0_grgpc_data
  33. include(`nvc0_graph.fuc')
  34. gpc_id: .b32 0
  35. gpc_mmio_list_head: .b32 0
  36. gpc_mmio_list_tail: .b32 0
  37. tpc_count: .b32 0
  38. tpc_mask: .b32 0
  39. tpc_mmio_list_head: .b32 0
  40. tpc_mmio_list_tail: .b32 0
  41. cmd_queue: queue_init
  42. // chipset descriptions
  43. chipsets:
  44. .b8 0xc0 0 0 0
  45. .b16 #nvc0_gpc_mmio_head
  46. .b16 #nvc0_gpc_mmio_tail
  47. .b16 #nvc0_tpc_mmio_head
  48. .b16 #nvc0_tpc_mmio_tail
  49. .b8 0xc1 0 0 0
  50. .b16 #nvc0_gpc_mmio_head
  51. .b16 #nvc1_gpc_mmio_tail
  52. .b16 #nvc0_tpc_mmio_head
  53. .b16 #nvc1_tpc_mmio_tail
  54. .b8 0xc3 0 0 0
  55. .b16 #nvc0_gpc_mmio_head
  56. .b16 #nvc0_gpc_mmio_tail
  57. .b16 #nvc0_tpc_mmio_head
  58. .b16 #nvc3_tpc_mmio_tail
  59. .b8 0xc4 0 0 0
  60. .b16 #nvc0_gpc_mmio_head
  61. .b16 #nvc0_gpc_mmio_tail
  62. .b16 #nvc0_tpc_mmio_head
  63. .b16 #nvc3_tpc_mmio_tail
  64. .b8 0xc8 0 0 0
  65. .b16 #nvc0_gpc_mmio_head
  66. .b16 #nvc0_gpc_mmio_tail
  67. .b16 #nvc0_tpc_mmio_head
  68. .b16 #nvc0_tpc_mmio_tail
  69. .b8 0xce 0 0 0
  70. .b16 #nvc0_gpc_mmio_head
  71. .b16 #nvc0_gpc_mmio_tail
  72. .b16 #nvc0_tpc_mmio_head
  73. .b16 #nvc3_tpc_mmio_tail
  74. .b8 0xcf 0 0 0
  75. .b16 #nvc0_gpc_mmio_head
  76. .b16 #nvc0_gpc_mmio_tail
  77. .b16 #nvc0_tpc_mmio_head
  78. .b16 #nvcf_tpc_mmio_tail
  79. .b8 0xd9 0 0 0
  80. .b16 #nvd9_gpc_mmio_head
  81. .b16 #nvd9_gpc_mmio_tail
  82. .b16 #nvd9_tpc_mmio_head
  83. .b16 #nvd9_tpc_mmio_tail
  84. .b8 0 0 0 0
  85. // GPC mmio lists
  86. nvc0_gpc_mmio_head:
  87. mmctx_data(0x000380, 1)
  88. mmctx_data(0x000400, 6)
  89. mmctx_data(0x000450, 9)
  90. mmctx_data(0x000600, 1)
  91. mmctx_data(0x000684, 1)
  92. mmctx_data(0x000700, 5)
  93. mmctx_data(0x000800, 1)
  94. mmctx_data(0x000808, 3)
  95. mmctx_data(0x000828, 1)
  96. mmctx_data(0x000830, 1)
  97. mmctx_data(0x0008d8, 1)
  98. mmctx_data(0x0008e0, 1)
  99. mmctx_data(0x0008e8, 6)
  100. mmctx_data(0x00091c, 1)
  101. mmctx_data(0x000924, 3)
  102. mmctx_data(0x000b00, 1)
  103. mmctx_data(0x000b08, 6)
  104. mmctx_data(0x000bb8, 1)
  105. mmctx_data(0x000c08, 1)
  106. mmctx_data(0x000c10, 8)
  107. mmctx_data(0x000c80, 1)
  108. mmctx_data(0x000c8c, 1)
  109. mmctx_data(0x001000, 3)
  110. mmctx_data(0x001014, 1)
  111. nvc0_gpc_mmio_tail:
  112. mmctx_data(0x000c6c, 1);
  113. nvc1_gpc_mmio_tail:
  114. nvd9_gpc_mmio_head:
  115. mmctx_data(0x000380, 1)
  116. mmctx_data(0x000400, 2)
  117. mmctx_data(0x00040c, 3)
  118. mmctx_data(0x000450, 9)
  119. mmctx_data(0x000600, 1)
  120. mmctx_data(0x000684, 1)
  121. mmctx_data(0x000700, 5)
  122. mmctx_data(0x000800, 1)
  123. mmctx_data(0x000808, 3)
  124. mmctx_data(0x000828, 1)
  125. mmctx_data(0x000830, 1)
  126. mmctx_data(0x0008d8, 1)
  127. mmctx_data(0x0008e0, 1)
  128. mmctx_data(0x0008e8, 6)
  129. mmctx_data(0x00091c, 1)
  130. mmctx_data(0x000924, 3)
  131. mmctx_data(0x000b00, 1)
  132. mmctx_data(0x000b08, 6)
  133. mmctx_data(0x000bb8, 1)
  134. mmctx_data(0x000c08, 1)
  135. mmctx_data(0x000c10, 8)
  136. mmctx_data(0x000c6c, 1)
  137. mmctx_data(0x000c80, 1)
  138. mmctx_data(0x000c8c, 1)
  139. mmctx_data(0x001000, 3)
  140. mmctx_data(0x001014, 1)
  141. nvd9_gpc_mmio_tail:
  142. // TPC mmio lists
  143. nvc0_tpc_mmio_head:
  144. mmctx_data(0x000018, 1)
  145. mmctx_data(0x00003c, 1)
  146. mmctx_data(0x000048, 1)
  147. mmctx_data(0x000064, 1)
  148. mmctx_data(0x000088, 1)
  149. mmctx_data(0x000200, 6)
  150. mmctx_data(0x00021c, 2)
  151. mmctx_data(0x000300, 6)
  152. mmctx_data(0x0003d0, 1)
  153. mmctx_data(0x0003e0, 2)
  154. mmctx_data(0x000400, 3)
  155. mmctx_data(0x000420, 1)
  156. mmctx_data(0x0004b0, 1)
  157. mmctx_data(0x0004e8, 1)
  158. mmctx_data(0x0004f4, 1)
  159. mmctx_data(0x000520, 2)
  160. mmctx_data(0x000604, 4)
  161. mmctx_data(0x000644, 20)
  162. mmctx_data(0x000698, 1)
  163. mmctx_data(0x000750, 2)
  164. nvc0_tpc_mmio_tail:
  165. mmctx_data(0x000758, 1)
  166. mmctx_data(0x0002c4, 1)
  167. mmctx_data(0x0006e0, 1)
  168. nvcf_tpc_mmio_tail:
  169. mmctx_data(0x0004bc, 1)
  170. nvc3_tpc_mmio_tail:
  171. mmctx_data(0x000544, 1)
  172. nvc1_tpc_mmio_tail:
  173. nvd9_tpc_mmio_head:
  174. mmctx_data(0x000018, 1)
  175. mmctx_data(0x00003c, 1)
  176. mmctx_data(0x000048, 1)
  177. mmctx_data(0x000064, 1)
  178. mmctx_data(0x000088, 1)
  179. mmctx_data(0x000200, 6)
  180. mmctx_data(0x00021c, 2)
  181. mmctx_data(0x0002c4, 1)
  182. mmctx_data(0x000300, 6)
  183. mmctx_data(0x0003d0, 1)
  184. mmctx_data(0x0003e0, 2)
  185. mmctx_data(0x000400, 3)
  186. mmctx_data(0x000420, 3)
  187. mmctx_data(0x0004b0, 1)
  188. mmctx_data(0x0004e8, 1)
  189. mmctx_data(0x0004f4, 1)
  190. mmctx_data(0x000520, 2)
  191. mmctx_data(0x000544, 1)
  192. mmctx_data(0x000604, 4)
  193. mmctx_data(0x000644, 20)
  194. mmctx_data(0x000698, 1)
  195. mmctx_data(0x0006e0, 1)
  196. mmctx_data(0x000750, 3)
  197. nvd9_tpc_mmio_tail:
  198. .section #nvc0_grgpc_code
  199. bra #init
  200. define(`include_code')
  201. include(`nvc0_graph.fuc')
  202. // reports an exception to the host
  203. //
  204. // In: $r15 error code (see nvc0_graph.fuc)
  205. //
  206. error:
  207. push $r14
  208. mov $r14 -0x67ec // 0x9814
  209. sethi $r14 0x400000
  210. call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
  211. add b32 $r14 0x41c
  212. mov $r15 1
  213. call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
  214. pop $r14
  215. ret
  216. // GPC fuc initialisation, executed by triggering ucode start, will
  217. // fall through to main loop after completion.
  218. //
  219. // Input:
  220. // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
  221. // CC_SCRATCH[1]: context base
  222. //
  223. // Output:
  224. // CC_SCRATCH[0]:
  225. // 31:31: set to signal completion
  226. // CC_SCRATCH[1]:
  227. // 31:0: GPC context size
  228. //
  229. init:
  230. clear b32 $r0
  231. mov $sp $r0
  232. // enable fifo access
  233. mov $r1 0x1200
  234. mov $r2 2
  235. iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
  236. // setup i0 handler, and route all interrupts to it
  237. mov $r1 #ih
  238. mov $iv0 $r1
  239. mov $r1 0x400
  240. iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
  241. // enable fifo interrupt
  242. mov $r2 4
  243. iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
  244. // enable interrupts
  245. bset $flags ie0
  246. // figure out which GPC we are, and how many TPCs we have
  247. mov $r1 0x608
  248. shl b32 $r1 6
  249. iord $r2 I[$r1 + 0x000] // UNITS
  250. mov $r3 1
  251. and $r2 0x1f
  252. shl b32 $r3 $r2
  253. sub b32 $r3 1
  254. st b32 D[$r0 + #tpc_count] $r2
  255. st b32 D[$r0 + #tpc_mask] $r3
  256. add b32 $r1 0x400
  257. iord $r2 I[$r1 + 0x000] // MYINDEX
  258. st b32 D[$r0 + #gpc_id] $r2
  259. // find context data for this chipset
  260. mov $r2 0x800
  261. shl b32 $r2 6
  262. iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
  263. mov $r1 #chipsets - 12
  264. init_find_chipset:
  265. add b32 $r1 12
  266. ld b32 $r3 D[$r1 + 0x00]
  267. cmpu b32 $r3 $r2
  268. bra e #init_context
  269. cmpu b32 $r3 0
  270. bra ne #init_find_chipset
  271. // unknown chipset
  272. ret
  273. // initialise context base, and size tracking
  274. init_context:
  275. mov $r2 0x800
  276. shl b32 $r2 6
  277. iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
  278. clear b32 $r3 // track GPC context size here
  279. // set mmctx base addresses now so we don't have to do it later,
  280. // they don't currently ever change
  281. mov $r4 0x700
  282. shl b32 $r4 6
  283. shr b32 $r5 $r2 8
  284. iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
  285. iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
  286. // calculate GPC mmio context size, store the chipset-specific
  287. // mmio list pointers somewhere we can get at them later without
  288. // re-parsing the chipset list
  289. clear b32 $r14
  290. clear b32 $r15
  291. ld b16 $r14 D[$r1 + 4]
  292. ld b16 $r15 D[$r1 + 6]
  293. st b16 D[$r0 + #gpc_mmio_list_head] $r14
  294. st b16 D[$r0 + #gpc_mmio_list_tail] $r15
  295. call #mmctx_size
  296. add b32 $r2 $r15
  297. add b32 $r3 $r15
  298. // calculate per-TPC mmio context size, store the list pointers
  299. ld b16 $r14 D[$r1 + 8]
  300. ld b16 $r15 D[$r1 + 10]
  301. st b16 D[$r0 + #tpc_mmio_list_head] $r14
  302. st b16 D[$r0 + #tpc_mmio_list_tail] $r15
  303. call #mmctx_size
  304. ld b32 $r14 D[$r0 + #tpc_count]
  305. mulu $r14 $r15
  306. add b32 $r2 $r14
  307. add b32 $r3 $r14
  308. // round up base/size to 256 byte boundary (for strand SWBASE)
  309. add b32 $r4 0x1300
  310. shr b32 $r3 2
  311. iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
  312. shr b32 $r2 8
  313. shr b32 $r3 6
  314. add b32 $r2 1
  315. add b32 $r3 1
  316. shl b32 $r2 8
  317. shl b32 $r3 8
  318. // calculate size of strand context data
  319. mov b32 $r15 $r2
  320. call #strand_ctx_init
  321. add b32 $r3 $r15
  322. // save context size, and tell HUB we're done
  323. mov $r1 0x800
  324. shl b32 $r1 6
  325. iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
  326. add b32 $r1 0x800
  327. clear b32 $r2
  328. bset $r2 31
  329. iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
  330. // Main program loop, very simple, sleeps until woken up by the interrupt
  331. // handler, pulls a command from the queue and executes its handler
  332. //
  333. main:
  334. bset $flags $p0
  335. sleep $p0
  336. mov $r13 #cmd_queue
  337. call #queue_get
  338. bra $p1 #main
  339. // 0x0000-0x0003 are all context transfers
  340. cmpu b32 $r14 0x04
  341. bra nc #main_not_ctx_xfer
  342. // fetch $flags and mask off $p1/$p2
  343. mov $r1 $flags
  344. mov $r2 0x0006
  345. not b32 $r2
  346. and $r1 $r2
  347. // set $p1/$p2 according to transfer type
  348. shl b32 $r14 1
  349. or $r1 $r14
  350. mov $flags $r1
  351. // transfer context data
  352. call #ctx_xfer
  353. bra #main
  354. main_not_ctx_xfer:
  355. shl b32 $r15 $r14 16
  356. or $r15 E_BAD_COMMAND
  357. call #error
  358. bra #main
  359. // interrupt handler
  360. ih:
  361. push $r8
  362. mov $r8 $flags
  363. push $r8
  364. push $r9
  365. push $r10
  366. push $r11
  367. push $r13
  368. push $r14
  369. push $r15
  370. // incoming fifo command?
  371. iord $r10 I[$r0 + 0x200] // INTR
  372. and $r11 $r10 0x00000004
  373. bra e #ih_no_fifo
  374. // queue incoming fifo command for later processing
  375. mov $r11 0x1900
  376. mov $r13 #cmd_queue
  377. iord $r14 I[$r11 + 0x100] // FIFO_CMD
  378. iord $r15 I[$r11 + 0x000] // FIFO_DATA
  379. call #queue_put
  380. add b32 $r11 0x400
  381. mov $r14 1
  382. iowr I[$r11 + 0x000] $r14 // FIFO_ACK
  383. // ack, and wake up main()
  384. ih_no_fifo:
  385. iowr I[$r0 + 0x100] $r10 // INTR_ACK
  386. pop $r15
  387. pop $r14
  388. pop $r13
  389. pop $r11
  390. pop $r10
  391. pop $r9
  392. pop $r8
  393. mov $flags $r8
  394. pop $r8
  395. bclr $flags $p0
  396. iret
  397. // Set this GPC's bit in HUB_BAR, used to signal completion of various
  398. // activities to the HUB fuc
  399. //
  400. hub_barrier_done:
  401. mov $r15 1
  402. ld b32 $r14 D[$r0 + #gpc_id]
  403. shl b32 $r15 $r14
  404. mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
  405. sethi $r14 0x400000
  406. call #nv_wr32
  407. ret
  408. // Disables various things, waits a bit, and re-enables them..
  409. //
  410. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  411. // good description for the bits we turn off? Anyways, without this,
  412. // funny things happen.
  413. //
  414. ctx_redswitch:
  415. mov $r14 0x614
  416. shl b32 $r14 6
  417. mov $r15 0x020
  418. iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
  419. mov $r15 8
  420. ctx_redswitch_delay:
  421. sub b32 $r15 1
  422. bra ne #ctx_redswitch_delay
  423. mov $r15 0xa20
  424. iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
  425. ret
  426. // Transfer GPC context data between GPU and storage area
  427. //
  428. // In: $r15 context base address
  429. // $p1 clear on save, set on load
  430. // $p2 set if opposite direction done/will be done, so:
  431. // on save it means: "a load will follow this save"
  432. // on load it means: "a save preceeded this load"
  433. //
  434. ctx_xfer:
  435. // set context base address
  436. mov $r1 0xa04
  437. shl b32 $r1 6
  438. iowr I[$r1 + 0x000] $r15// MEM_BASE
  439. bra not $p1 #ctx_xfer_not_load
  440. call #ctx_redswitch
  441. ctx_xfer_not_load:
  442. // strands
  443. mov $r1 0x4afc
  444. sethi $r1 0x20000
  445. mov $r2 0xc
  446. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
  447. call #strand_wait
  448. mov $r2 0x47fc
  449. sethi $r2 0x20000
  450. iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
  451. xbit $r2 $flags $p1
  452. add b32 $r2 3
  453. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
  454. // mmio context
  455. xbit $r10 $flags $p1 // direction
  456. or $r10 2 // first
  457. mov $r11 0x0000
  458. sethi $r11 0x500000
  459. ld b32 $r12 D[$r0 + #gpc_id]
  460. shl b32 $r12 15
  461. add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
  462. ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
  463. ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
  464. mov $r14 0 // not multi
  465. call #mmctx_xfer
  466. // per-TPC mmio context
  467. xbit $r10 $flags $p1 // direction
  468. or $r10 4 // last
  469. mov $r11 0x4000
  470. sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
  471. ld b32 $r12 D[$r0 + #gpc_id]
  472. shl b32 $r12 15
  473. add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
  474. ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
  475. ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
  476. ld b32 $r15 D[$r0 + #tpc_mask]
  477. mov $r14 0x800 // stride = 0x800
  478. call #mmctx_xfer
  479. // wait for strands to finish
  480. call #strand_wait
  481. // if load, or a save without a load following, do some
  482. // unknown stuff that's done after finishing a block of
  483. // strand commands
  484. bra $p1 #ctx_xfer_post
  485. bra not $p2 #ctx_xfer_done
  486. ctx_xfer_post:
  487. mov $r1 0x4afc
  488. sethi $r1 0x20000
  489. mov $r2 0xd
  490. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
  491. call #strand_wait
  492. // mark completion in HUB's barrier
  493. ctx_xfer_done:
  494. call #hub_barrier_done
  495. ret
  496. .align 256