mxgpu_ai.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include "amdgpu.h"
  24. #include "vega10/soc15ip.h"
  25. #include "vega10/NBIO/nbio_6_1_offset.h"
  26. #include "vega10/NBIO/nbio_6_1_sh_mask.h"
  27. #include "vega10/GC/gc_9_0_offset.h"
  28. #include "vega10/GC/gc_9_0_sh_mask.h"
  29. #include "soc15.h"
  30. #include "vega10_ih.h"
  31. #include "soc15_common.h"
  32. #include "mxgpu_ai.h"
  33. static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
  34. {
  35. u32 reg;
  36. int timeout = AI_MAILBOX_TIMEDOUT;
  37. u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
  38. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  39. mmBIF_BX_PF0_MAILBOX_CONTROL));
  40. reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
  41. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  42. mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
  43. /*Wait for RCV_MSG_VALID to be 0*/
  44. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  45. mmBIF_BX_PF0_MAILBOX_CONTROL));
  46. while (reg & mask) {
  47. if (timeout <= 0) {
  48. pr_err("RCV_MSG_VALID is not cleared\n");
  49. break;
  50. }
  51. mdelay(1);
  52. timeout -=1;
  53. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  54. mmBIF_BX_PF0_MAILBOX_CONTROL));
  55. }
  56. }
  57. static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
  58. {
  59. u32 reg;
  60. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  61. mmBIF_BX_PF0_MAILBOX_CONTROL));
  62. reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
  63. TRN_MSG_VALID, val ? 1 : 0);
  64. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
  65. reg);
  66. }
  67. static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
  68. enum idh_event event)
  69. {
  70. u32 reg;
  71. u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
  72. if (event != IDH_FLR_NOTIFICATION_CMPL) {
  73. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  74. mmBIF_BX_PF0_MAILBOX_CONTROL));
  75. if (!(reg & mask))
  76. return -ENOENT;
  77. }
  78. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  79. mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
  80. if (reg != event)
  81. return -ENOENT;
  82. xgpu_ai_mailbox_send_ack(adev);
  83. return 0;
  84. }
  85. static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
  86. {
  87. int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
  88. u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK);
  89. u32 reg;
  90. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  91. mmBIF_BX_PF0_MAILBOX_CONTROL));
  92. while (!(reg & mask)) {
  93. if (timeout <= 0) {
  94. pr_err("Doesn't get ack from pf.\n");
  95. r = -ETIME;
  96. break;
  97. }
  98. mdelay(5);
  99. timeout -= 5;
  100. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  101. mmBIF_BX_PF0_MAILBOX_CONTROL));
  102. }
  103. return r;
  104. }
  105. static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
  106. {
  107. int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
  108. r = xgpu_ai_mailbox_rcv_msg(adev, event);
  109. while (r) {
  110. if (timeout <= 0) {
  111. pr_err("Doesn't get msg:%d from pf.\n", event);
  112. r = -ETIME;
  113. break;
  114. }
  115. mdelay(5);
  116. timeout -= 5;
  117. r = xgpu_ai_mailbox_rcv_msg(adev, event);
  118. }
  119. return r;
  120. }
  121. static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
  122. enum idh_request req, u32 data1, u32 data2, u32 data3) {
  123. u32 reg;
  124. int r;
  125. reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
  126. mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
  127. reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0,
  128. MSGBUF_DATA, req);
  129. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0),
  130. reg);
  131. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1),
  132. data1);
  133. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2),
  134. data2);
  135. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3),
  136. data3);
  137. xgpu_ai_mailbox_set_valid(adev, true);
  138. /* start to poll ack */
  139. r = xgpu_ai_poll_ack(adev);
  140. if (r)
  141. pr_err("Doesn't get ack from pf, continue\n");
  142. xgpu_ai_mailbox_set_valid(adev, false);
  143. }
  144. static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
  145. enum idh_request req)
  146. {
  147. int r;
  148. xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
  149. /* start to check msg if request is idh_req_gpu_init_access */
  150. if (req == IDH_REQ_GPU_INIT_ACCESS ||
  151. req == IDH_REQ_GPU_FINI_ACCESS ||
  152. req == IDH_REQ_GPU_RESET_ACCESS) {
  153. r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
  154. if (r) {
  155. pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
  156. return r;
  157. }
  158. }
  159. return 0;
  160. }
  161. static int xgpu_ai_request_reset(struct amdgpu_device *adev)
  162. {
  163. return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
  164. }
  165. static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
  166. bool init)
  167. {
  168. enum idh_request req;
  169. req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
  170. return xgpu_ai_send_access_requests(adev, req);
  171. }
  172. static int xgpu_ai_release_full_gpu_access(struct amdgpu_device *adev,
  173. bool init)
  174. {
  175. enum idh_request req;
  176. int r = 0;
  177. req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
  178. r = xgpu_ai_send_access_requests(adev, req);
  179. return r;
  180. }
  181. static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
  182. struct amdgpu_irq_src *source,
  183. struct amdgpu_iv_entry *entry)
  184. {
  185. DRM_DEBUG("get ack intr and do nothing.\n");
  186. return 0;
  187. }
  188. static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
  189. struct amdgpu_irq_src *source,
  190. unsigned type,
  191. enum amdgpu_interrupt_state state)
  192. {
  193. u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
  194. tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, ACK_INT_EN,
  195. (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
  196. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
  197. return 0;
  198. }
  199. static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
  200. {
  201. struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
  202. struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
  203. /* wait until RCV_MSG become 3 */
  204. if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
  205. pr_err("failed to recieve FLR_CMPL\n");
  206. return;
  207. }
  208. /* Trigger recovery due to world switch failure */
  209. amdgpu_sriov_gpu_reset(adev, NULL);
  210. }
  211. static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
  212. struct amdgpu_irq_src *src,
  213. unsigned type,
  214. enum amdgpu_interrupt_state state)
  215. {
  216. u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
  217. tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, VALID_INT_EN,
  218. (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
  219. WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
  220. return 0;
  221. }
  222. static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
  223. struct amdgpu_irq_src *source,
  224. struct amdgpu_iv_entry *entry)
  225. {
  226. int r;
  227. /* trigger gpu-reset by hypervisor only if TDR disbaled */
  228. if (amdgpu_lockup_timeout == 0) {
  229. /* see what event we get */
  230. r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
  231. /* sometimes the interrupt is delayed to inject to VM, so under such case
  232. * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus
  233. * above recieve message could be failed, we should schedule the flr_work
  234. * anyway
  235. */
  236. if (r) {
  237. DRM_ERROR("FLR_NOTIFICATION is missed\n");
  238. xgpu_ai_mailbox_send_ack(adev);
  239. }
  240. schedule_work(&adev->virt.flr_work);
  241. }
  242. return 0;
  243. }
  244. static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_ack_irq_funcs = {
  245. .set = xgpu_ai_set_mailbox_ack_irq,
  246. .process = xgpu_ai_mailbox_ack_irq,
  247. };
  248. static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_rcv_irq_funcs = {
  249. .set = xgpu_ai_set_mailbox_rcv_irq,
  250. .process = xgpu_ai_mailbox_rcv_irq,
  251. };
  252. void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev)
  253. {
  254. adev->virt.ack_irq.num_types = 1;
  255. adev->virt.ack_irq.funcs = &xgpu_ai_mailbox_ack_irq_funcs;
  256. adev->virt.rcv_irq.num_types = 1;
  257. adev->virt.rcv_irq.funcs = &xgpu_ai_mailbox_rcv_irq_funcs;
  258. }
  259. int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
  260. {
  261. int r;
  262. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
  263. if (r)
  264. return r;
  265. r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
  266. if (r) {
  267. amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
  268. return r;
  269. }
  270. return 0;
  271. }
  272. int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
  273. {
  274. int r;
  275. r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
  276. if (r)
  277. return r;
  278. r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
  279. if (r) {
  280. amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
  281. return r;
  282. }
  283. INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
  284. return 0;
  285. }
  286. void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
  287. {
  288. amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
  289. amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
  290. }
  291. const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
  292. .req_full_gpu = xgpu_ai_request_full_gpu_access,
  293. .rel_full_gpu = xgpu_ai_release_full_gpu_access,
  294. .reset_gpu = xgpu_ai_request_reset,
  295. .trans_msg = xgpu_ai_mailbox_trans_msg,
  296. };