adreno_snapshot.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079
  1. /* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License version 2 and
  5. * only version 2 as published by the Free Software Foundation.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. */
  12. #include "kgsl.h"
  13. #include "kgsl_sharedmem.h"
  14. #include "kgsl_snapshot.h"
  15. #include "adreno.h"
  16. #include "adreno_pm4types.h"
  17. #include "a2xx_reg.h"
  18. #include "a3xx_reg.h"
  19. /* Number of dwords of ringbuffer history to record */
  20. #define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100
  21. /* Maintain a list of the objects we see during parsing */
  22. #define SNAPSHOT_OBJ_BUFSIZE 64
  23. #define SNAPSHOT_OBJ_TYPE_IB 0
  24. /* Keep track of how many bytes are frozen after a snapshot and tell the user */
  25. static int snapshot_frozen_objsize;
  26. static struct kgsl_snapshot_obj {
  27. int type;
  28. uint32_t gpuaddr;
  29. phys_addr_t ptbase;
  30. void *ptr;
  31. int dwords;
  32. struct kgsl_mem_entry *entry;
  33. } objbuf[SNAPSHOT_OBJ_BUFSIZE];
  34. /* Pointer to the next open entry in the object list */
  35. static int objbufptr;
  36. /* Push a new buffer object onto the list */
  37. static void push_object(struct kgsl_device *device, int type,
  38. phys_addr_t ptbase,
  39. uint32_t gpuaddr, int dwords)
  40. {
  41. int index;
  42. void *ptr;
  43. struct kgsl_mem_entry *entry = NULL;
  44. /*
  45. * Sometimes IBs can be reused in the same dump. Because we parse from
  46. * oldest to newest, if we come across an IB that has already been used,
  47. * assume that it has been reused and update the list with the newest
  48. * size.
  49. */
  50. for (index = 0; index < objbufptr; index++) {
  51. if (objbuf[index].gpuaddr == gpuaddr &&
  52. objbuf[index].ptbase == ptbase) {
  53. objbuf[index].dwords = dwords;
  54. return;
  55. }
  56. }
  57. if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) {
  58. KGSL_DRV_ERR(device, "snapshot: too many snapshot objects\n");
  59. return;
  60. }
  61. /*
  62. * adreno_convertaddr verifies that the IB size is valid - at least in
  63. * the context of it being smaller then the allocated memory space
  64. */
  65. ptr = adreno_convertaddr(device, ptbase, gpuaddr, dwords << 2, &entry);
  66. if (ptr == NULL) {
  67. KGSL_DRV_ERR(device,
  68. "snapshot: Can't find GPU address for %x\n", gpuaddr);
  69. return;
  70. }
  71. /* Put it on the list of things to parse */
  72. objbuf[objbufptr].type = type;
  73. objbuf[objbufptr].gpuaddr = gpuaddr;
  74. objbuf[objbufptr].ptbase = ptbase;
  75. objbuf[objbufptr].dwords = dwords;
  76. objbuf[objbufptr].entry = entry;
  77. objbuf[objbufptr++].ptr = ptr;
  78. }
  79. /*
  80. * Return a 1 if the specified object is already on the list of buffers
  81. * to be dumped
  82. */
  83. static int find_object(int type, unsigned int gpuaddr, phys_addr_t ptbase)
  84. {
  85. int index;
  86. for (index = 0; index < objbufptr; index++) {
  87. if (objbuf[index].gpuaddr == gpuaddr &&
  88. objbuf[index].ptbase == ptbase &&
  89. objbuf[index].type == type)
  90. return 1;
  91. }
  92. return 0;
  93. }
  94. /*
  95. * This structure keeps track of type0 writes to VSC_PIPE_DATA_ADDRESS_x and
  96. * VSC_PIPE_DATA_LENGTH_x. When a draw initator is called these registers
  97. * point to buffers that we need to freeze for a snapshot
  98. */
  99. static struct {
  100. unsigned int base;
  101. unsigned int size;
  102. } vsc_pipe[8];
  103. /*
  104. * This is the cached value of type0 writes to the VSC_SIZE_ADDRESS which
  105. * contains the buffer address of the visiblity stream size buffer during a
  106. * binning pass
  107. */
  108. static unsigned int vsc_size_address;
  109. /*
  110. * This struct keeps track of type0 writes to VFD_FETCH_INSTR_0_X and
  111. * VFD_FETCH_INSTR_1_X registers. When a draw initator is called the addresses
  112. * and sizes in these registers point to VBOs that we need to freeze for a
  113. * snapshot
  114. */
  115. static struct {
  116. unsigned int base;
  117. unsigned int stride;
  118. } vbo[16];
  119. /*
  120. * This is the cached value of type0 writes to VFD_INDEX_MAX. This will be used
  121. * to calculate the size of the VBOs when the draw initator is called
  122. */
  123. static unsigned int vfd_index_max;
  124. /*
  125. * This is the cached value of type0 writes to VFD_CONTROL_0 which tells us how
  126. * many VBOs are active when the draw initator is called
  127. */
  128. static unsigned int vfd_control_0;
  129. /*
  130. * Cached value of type0 writes to SP_VS_PVT_MEM_ADDR and SP_FS_PVT_MEM_ADDR.
  131. * This is a buffer that contains private stack information for the shader
  132. */
  133. static unsigned int sp_vs_pvt_mem_addr;
  134. static unsigned int sp_fs_pvt_mem_addr;
  135. /*
  136. * Cached value of SP_VS_OBJ_START_REG and SP_FS_OBJ_START_REG.
  137. */
  138. static unsigned int sp_vs_obj_start_reg;
  139. static unsigned int sp_fs_obj_start_reg;
  140. /*
  141. * Each load state block has two possible types. Each type has a different
  142. * number of dwords per unit. Use this handy lookup table to make sure
  143. * we dump the right amount of data from the indirect buffer
  144. */
  145. static int load_state_unit_sizes[7][2] = {
  146. { 2, 4 },
  147. { 0, 1 },
  148. { 2, 4 },
  149. { 0, 1 },
  150. { 8, 2 },
  151. { 8, 2 },
  152. { 8, 2 },
  153. };
  154. static int ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
  155. phys_addr_t ptbase)
  156. {
  157. unsigned int block, source, type;
  158. int ret = 0;
  159. /*
  160. * The object here is to find indirect shaders i.e - shaders loaded from
  161. * GPU memory instead of directly in the command. These should be added
  162. * to the list of memory objects to dump. So look at the load state
  163. * if the block is indirect (source = 4). If so then add the memory
  164. * address to the list. The size of the object differs depending on the
  165. * type per the load_state_unit_sizes array above.
  166. */
  167. if (type3_pkt_size(pkt[0]) < 2)
  168. return 0;
  169. /*
  170. * pkt[1] 18:16 - source
  171. * pkt[1] 21:19 - state block
  172. * pkt[1] 31:22 - size in units
  173. * pkt[2] 0:1 - type
  174. * pkt[2] 31:2 - GPU memory address
  175. */
  176. block = (pkt[1] >> 19) & 0x07;
  177. source = (pkt[1] >> 16) & 0x07;
  178. type = pkt[2] & 0x03;
  179. if (source == 4) {
  180. int unitsize, ret;
  181. if (type == 0)
  182. unitsize = load_state_unit_sizes[block][0];
  183. else
  184. unitsize = load_state_unit_sizes[block][1];
  185. /* Freeze the GPU buffer containing the shader */
  186. ret = kgsl_snapshot_get_object(device, ptbase,
  187. pkt[2] & 0xFFFFFFFC,
  188. (((pkt[1] >> 22) & 0x03FF) * unitsize) << 2,
  189. SNAPSHOT_GPU_OBJECT_SHADER);
  190. if (ret < 0)
  191. return -EINVAL;
  192. snapshot_frozen_objsize += ret;
  193. }
  194. return ret;
  195. }
  196. /*
  197. * This opcode sets the base addresses for the visibilty stream buffer and the
  198. * visiblity stream size buffer.
  199. */
  200. static int ib_parse_set_bin_data(struct kgsl_device *device, unsigned int *pkt,
  201. phys_addr_t ptbase)
  202. {
  203. int ret;
  204. if (type3_pkt_size(pkt[0]) < 2)
  205. return 0;
  206. /* Visiblity stream buffer */
  207. ret = kgsl_snapshot_get_object(device, ptbase, pkt[1], 0,
  208. SNAPSHOT_GPU_OBJECT_GENERIC);
  209. if (ret < 0)
  210. return -EINVAL;
  211. snapshot_frozen_objsize += ret;
  212. /* visiblity stream size buffer (fixed size 8 dwords) */
  213. ret = kgsl_snapshot_get_object(device, ptbase, pkt[2], 32,
  214. SNAPSHOT_GPU_OBJECT_GENERIC);
  215. if (ret >= 0)
  216. snapshot_frozen_objsize += ret;
  217. return ret;
  218. }
  219. /*
  220. * This opcode writes to GPU memory - if the buffer is written to, there is a
  221. * good chance that it would be valuable to capture in the snapshot, so mark all
  222. * buffers that are written to as frozen
  223. */
  224. static int ib_parse_mem_write(struct kgsl_device *device, unsigned int *pkt,
  225. phys_addr_t ptbase)
  226. {
  227. int ret;
  228. if (type3_pkt_size(pkt[0]) < 1)
  229. return 0;
  230. /*
  231. * The address is where the data in the rest of this packet is written
  232. * to, but since that might be an offset into the larger buffer we need
  233. * to get the whole thing. Pass a size of 0 kgsl_snapshot_get_object to
  234. * capture the entire buffer.
  235. */
  236. ret = kgsl_snapshot_get_object(device, ptbase, pkt[1] & 0xFFFFFFFC, 0,
  237. SNAPSHOT_GPU_OBJECT_GENERIC);
  238. if (ret >= 0)
  239. snapshot_frozen_objsize += ret;
  240. return ret;
  241. }
  242. /*
  243. * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
  244. * the GPU, so this is the point where all the registers and buffers become
  245. * "valid". The DRAW_INDX may also have an index buffer pointer that should be
  246. * frozen with the others
  247. */
  248. static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
  249. phys_addr_t ptbase)
  250. {
  251. int ret = 0, i;
  252. if (type3_pkt_size(pkt[0]) < 3)
  253. return 0;
  254. /* DRAW_IDX may have a index buffer pointer */
  255. if (type3_pkt_size(pkt[0]) > 3) {
  256. ret = kgsl_snapshot_get_object(device, ptbase, pkt[4], pkt[5],
  257. SNAPSHOT_GPU_OBJECT_GENERIC);
  258. if (ret < 0)
  259. return -EINVAL;
  260. snapshot_frozen_objsize += ret;
  261. }
  262. /*
  263. * All of the type0 writes are valid at a draw initiator, so freeze
  264. * the various buffers that we are tracking
  265. */
  266. /* First up the visiblity stream buffer */
  267. for (i = 0; i < ARRAY_SIZE(vsc_pipe); i++) {
  268. if (vsc_pipe[i].base != 0 && vsc_pipe[i].size != 0) {
  269. ret = kgsl_snapshot_get_object(device, ptbase,
  270. vsc_pipe[i].base, vsc_pipe[i].size,
  271. SNAPSHOT_GPU_OBJECT_GENERIC);
  272. if (ret < 0)
  273. return -EINVAL;
  274. snapshot_frozen_objsize += ret;
  275. }
  276. }
  277. /* Next the visibility stream size buffer */
  278. if (vsc_size_address) {
  279. ret = kgsl_snapshot_get_object(device, ptbase,
  280. vsc_size_address, 32,
  281. SNAPSHOT_GPU_OBJECT_GENERIC);
  282. if (ret < 0)
  283. return -EINVAL;
  284. snapshot_frozen_objsize += ret;
  285. }
  286. /* Next private shader buffer memory */
  287. if (sp_vs_pvt_mem_addr) {
  288. ret = kgsl_snapshot_get_object(device, ptbase,
  289. sp_vs_pvt_mem_addr, 8192,
  290. SNAPSHOT_GPU_OBJECT_GENERIC);
  291. if (ret < 0)
  292. return -EINVAL;
  293. snapshot_frozen_objsize += ret;
  294. sp_vs_pvt_mem_addr = 0;
  295. }
  296. if (sp_fs_pvt_mem_addr) {
  297. ret = kgsl_snapshot_get_object(device, ptbase,
  298. sp_fs_pvt_mem_addr, 8192,
  299. SNAPSHOT_GPU_OBJECT_GENERIC);
  300. if (ret < 0)
  301. return -EINVAL;
  302. snapshot_frozen_objsize += ret;
  303. sp_fs_pvt_mem_addr = 0;
  304. }
  305. if (sp_vs_obj_start_reg) {
  306. ret = kgsl_snapshot_get_object(device, ptbase,
  307. sp_vs_obj_start_reg & 0xFFFFFFE0, 0,
  308. SNAPSHOT_GPU_OBJECT_GENERIC);
  309. if (ret < 0)
  310. return -EINVAL;
  311. snapshot_frozen_objsize += ret;
  312. sp_vs_obj_start_reg = 0;
  313. }
  314. if (sp_fs_obj_start_reg) {
  315. ret = kgsl_snapshot_get_object(device, ptbase,
  316. sp_fs_obj_start_reg & 0xFFFFFFE0, 0,
  317. SNAPSHOT_GPU_OBJECT_GENERIC);
  318. if (ret < 0)
  319. return -EINVAL;
  320. snapshot_frozen_objsize += ret;
  321. sp_fs_obj_start_reg = 0;
  322. }
  323. /* Finally: VBOs */
  324. /* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
  325. for (i = 0; i < (vfd_control_0) >> 27; i++) {
  326. int size;
  327. /*
  328. * The size of the VBO is the stride stored in
  329. * VFD_FETCH_INSTR_0_X.BUFSTRIDE * VFD_INDEX_MAX. The base
  330. * is stored in VFD_FETCH_INSTR_1_X
  331. */
  332. if (vbo[i].base != 0) {
  333. size = vbo[i].stride * vfd_index_max;
  334. ret = kgsl_snapshot_get_object(device, ptbase,
  335. vbo[i].base,
  336. 0, SNAPSHOT_GPU_OBJECT_GENERIC);
  337. if (ret < 0)
  338. return -EINVAL;
  339. snapshot_frozen_objsize += ret;
  340. }
  341. vbo[i].base = 0;
  342. vbo[i].stride = 0;
  343. }
  344. vfd_control_0 = 0;
  345. vfd_index_max = 0;
  346. return ret;
  347. }
  348. /*
  349. * Parse all the type3 opcode packets that may contain important information,
  350. * such as additional GPU buffers to grab or a draw initator
  351. */
  352. static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
  353. phys_addr_t ptbase)
  354. {
  355. int opcode = cp_type3_opcode(*ptr);
  356. if (opcode == CP_LOAD_STATE)
  357. return ib_parse_load_state(device, ptr, ptbase);
  358. else if (opcode == CP_SET_BIN_DATA)
  359. return ib_parse_set_bin_data(device, ptr, ptbase);
  360. else if (opcode == CP_MEM_WRITE)
  361. return ib_parse_mem_write(device, ptr, ptbase);
  362. else if (opcode == CP_DRAW_INDX)
  363. return ib_parse_draw_indx(device, ptr, ptbase);
  364. return 0;
  365. }
  366. /*
  367. * Parse type0 packets found in the stream. Some of the registers that are
  368. * written are clues for GPU buffers that we need to freeze. Register writes
  369. * are considred valid when a draw initator is called, so just cache the values
  370. * here and freeze them when a CP_DRAW_INDX is seen. This protects against
  371. * needlessly caching buffers that won't be used during a draw call
  372. */
  373. static void ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
  374. phys_addr_t ptbase)
  375. {
  376. struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
  377. int size = type0_pkt_size(*ptr);
  378. int offset = type0_pkt_offset(*ptr);
  379. int i;
  380. for (i = 0; i < size - 1; i++, offset++) {
  381. /* Visiblity stream buffer */
  382. if (offset >= adreno_getreg(adreno_dev,
  383. ADRENO_REG_VSC_PIPE_DATA_ADDRESS_0) &&
  384. offset <= adreno_getreg(adreno_dev,
  385. ADRENO_REG_VSC_PIPE_DATA_LENGTH_7)) {
  386. int index = offset - adreno_getreg(adreno_dev,
  387. ADRENO_REG_VSC_PIPE_DATA_ADDRESS_0);
  388. /* Each bank of address and length registers are
  389. * interleaved with an empty register:
  390. *
  391. * address 0
  392. * length 0
  393. * empty
  394. * address 1
  395. * length 1
  396. * empty
  397. * ...
  398. */
  399. if ((index % 3) == 0)
  400. vsc_pipe[index / 3].base = ptr[i + 1];
  401. else if ((index % 3) == 1)
  402. vsc_pipe[index / 3].size = ptr[i + 1];
  403. } else if ((offset >= adreno_getreg(adreno_dev,
  404. ADRENO_REG_VFD_FETCH_INSTR_0_0)) &&
  405. (offset <= adreno_getreg(adreno_dev,
  406. ADRENO_REG_VFD_FETCH_INSTR_1_F))) {
  407. int index = offset -
  408. adreno_getreg(adreno_dev,
  409. ADRENO_REG_VFD_FETCH_INSTR_0_0);
  410. /*
  411. * FETCH_INSTR_0_X and FETCH_INSTR_1_X banks are
  412. * interleaved as above but without the empty register
  413. * in between
  414. */
  415. if ((index % 2) == 0)
  416. vbo[index >> 1].stride =
  417. (ptr[i + 1] >> 7) & 0x1FF;
  418. else
  419. vbo[index >> 1].base = ptr[i + 1];
  420. } else {
  421. /*
  422. * Cache various support registers for calculating
  423. * buffer sizes
  424. */
  425. if (offset ==
  426. adreno_getreg(adreno_dev,
  427. ADRENO_REG_VFD_CONTROL_0))
  428. vfd_control_0 = ptr[i + 1];
  429. else if (offset ==
  430. adreno_getreg(adreno_dev,
  431. ADRENO_REG_VFD_INDEX_MAX))
  432. vfd_index_max = ptr[i + 1];
  433. else if (offset ==
  434. adreno_getreg(adreno_dev,
  435. ADRENO_REG_VSC_SIZE_ADDRESS))
  436. vsc_size_address = ptr[i + 1];
  437. else if (offset == adreno_getreg(adreno_dev,
  438. ADRENO_REG_SP_VS_PVT_MEM_ADDR_REG))
  439. sp_vs_pvt_mem_addr = ptr[i + 1];
  440. else if (offset == adreno_getreg(adreno_dev,
  441. ADRENO_REG_SP_FS_PVT_MEM_ADDR_REG))
  442. sp_fs_pvt_mem_addr = ptr[i + 1];
  443. else if (offset == adreno_getreg(adreno_dev,
  444. ADRENO_REG_SP_VS_OBJ_START_REG))
  445. sp_vs_obj_start_reg = ptr[i + 1];
  446. else if (offset == adreno_getreg(adreno_dev,
  447. ADRENO_REG_SP_FS_OBJ_START_REG))
  448. sp_fs_obj_start_reg = ptr[i + 1];
  449. }
  450. }
  451. }
  452. static inline int parse_ib(struct kgsl_device *device, phys_addr_t ptbase,
  453. unsigned int gpuaddr, unsigned int dwords);
  454. /* Add an IB as a GPU object, but first, parse it to find more goodies within */
  455. static int ib_add_gpu_object(struct kgsl_device *device, phys_addr_t ptbase,
  456. unsigned int gpuaddr, unsigned int dwords)
  457. {
  458. int i, ret, rem = dwords;
  459. unsigned int *src;
  460. struct kgsl_mem_entry *entry = NULL;
  461. /*
  462. * If the object is already in the list, we don't need to parse it again
  463. */
  464. if (kgsl_snapshot_have_object(device, ptbase, gpuaddr, dwords << 2))
  465. return 0;
  466. src = (unsigned int *) adreno_convertaddr(device, ptbase, gpuaddr,
  467. dwords << 2, &entry);
  468. if (src == NULL)
  469. return -EINVAL;
  470. for (i = 0; rem > 0; rem--, i++) {
  471. int pktsize;
  472. /* If the packet isn't a type 1 or a type 3, then don't bother
  473. * parsing it - it is likely corrupted */
  474. if (!pkt_is_type0(src[i]) && !pkt_is_type3(src[i]))
  475. break;
  476. pktsize = type3_pkt_size(src[i]);
  477. if (!pktsize || (pktsize + 1) > rem)
  478. break;
  479. if (pkt_is_type3(src[i])) {
  480. if (adreno_cmd_is_ib(src[i])) {
  481. unsigned int gpuaddr = src[i + 1];
  482. unsigned int size = src[i + 2];
  483. parse_ib(device, ptbase, gpuaddr, size);
  484. } else {
  485. ret = ib_parse_type3(device, &src[i], ptbase);
  486. /*
  487. * If the parse function failed (probably
  488. * because of a bad decode) then bail out and
  489. * just capture the binary IB data
  490. */
  491. if (ret < 0)
  492. goto done;
  493. }
  494. } else if (pkt_is_type0(src[i])) {
  495. ib_parse_type0(device, &src[i], ptbase);
  496. }
  497. i += pktsize;
  498. rem -= pktsize;
  499. }
  500. done:
  501. ret = kgsl_snapshot_get_object(device, ptbase, gpuaddr, dwords << 2,
  502. SNAPSHOT_GPU_OBJECT_IB);
  503. if (ret >= 0)
  504. snapshot_frozen_objsize += ret;
  505. if (entry) {
  506. kgsl_memdesc_unmap(&entry->memdesc);
  507. kgsl_mem_entry_put(entry);
  508. }
  509. return ret;
  510. }
  511. /*
  512. * We want to store the last executed IB1 and IB2 in the static region to ensure
  513. * that we get at least some information out of the snapshot even if we can't
  514. * access the dynamic data from the sysfs file. Push all other IBs on the
  515. * dynamic list
  516. */
  517. static inline int parse_ib(struct kgsl_device *device, phys_addr_t ptbase,
  518. unsigned int gpuaddr, unsigned int dwords)
  519. {
  520. struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
  521. unsigned int ib1base, ib2base;
  522. int ret = 0;
  523. /*
  524. * Check the IB address - if it is either the last executed IB1 or the
  525. * last executed IB2 then push it into the static blob otherwise put
  526. * it in the dynamic list
  527. */
  528. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ib1base);
  529. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BASE, &ib2base);
  530. if (gpuaddr == ib1base || gpuaddr == ib2base)
  531. push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
  532. gpuaddr, dwords);
  533. else
  534. ret = ib_add_gpu_object(device, ptbase, gpuaddr, dwords);
  535. return ret;
  536. }
  537. /* Snapshot the ringbuffer memory */
  538. static int snapshot_rb(struct kgsl_device *device, void *snapshot,
  539. int remain, void *priv)
  540. {
  541. struct kgsl_snapshot_rb *header = snapshot;
  542. unsigned int *data = snapshot + sizeof(*header);
  543. struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
  544. struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
  545. unsigned int rptr, *rbptr, ibbase;
  546. phys_addr_t ptbase;
  547. int index, size, i;
  548. int parse_ibs = 0, ib_parse_start;
  549. /* Get the physical address of the MMU pagetable */
  550. ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
  551. /* Get the current read pointers for the RB */
  552. adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
  553. /* Address of the last processed IB */
  554. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ibbase);
  555. /*
  556. * Figure out the window of ringbuffer data to dump. First we need to
  557. * find where the last processed IB ws submitted. Start walking back
  558. * from the rptr
  559. */
  560. index = rptr;
  561. rbptr = rb->buffer_desc.hostptr;
  562. do {
  563. index--;
  564. if (index < 0) {
  565. index = rb->sizedwords - 3;
  566. /* We wrapped without finding what we wanted */
  567. if (index < rb->wptr) {
  568. index = rb->wptr;
  569. break;
  570. }
  571. }
  572. if (adreno_cmd_is_ib(rbptr[index]) &&
  573. rbptr[index + 1] == ibbase)
  574. break;
  575. } while (index != rb->wptr);
  576. /*
  577. * index points at the last submitted IB. We can only trust that the
  578. * memory between the context switch and the hanging IB is valid, so
  579. * the next step is to find the context switch before the submission
  580. */
  581. while (index != rb->wptr) {
  582. index--;
  583. if (index < 0) {
  584. index = rb->sizedwords - 2;
  585. /*
  586. * Wrapped without finding the context switch. This is
  587. * harmless - we should still have enough data to dump a
  588. * valid state
  589. */
  590. if (index < rb->wptr) {
  591. index = rb->wptr;
  592. break;
  593. }
  594. }
  595. /* Break if the current packet is a context switch identifier */
  596. if ((rbptr[index] == cp_nop_packet(1)) &&
  597. (rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER))
  598. break;
  599. }
  600. /*
  601. * Index represents the start of the window of interest. We will try
  602. * to dump all buffers between here and the rptr
  603. */
  604. ib_parse_start = index;
  605. /*
  606. * Dump the entire ringbuffer - the parser can choose how much of it to
  607. * process
  608. */
  609. size = (rb->sizedwords << 2);
  610. if (remain < size + sizeof(*header)) {
  611. KGSL_DRV_ERR(device,
  612. "snapshot: Not enough memory for the rb section");
  613. return 0;
  614. }
  615. /* Write the sub-header for the section */
  616. header->start = rb->wptr;
  617. header->end = rb->wptr;
  618. header->wptr = rb->wptr;
  619. header->rbsize = rb->sizedwords;
  620. header->count = rb->sizedwords;
  621. /*
  622. * Loop through the RB, copying the data and looking for indirect
  623. * buffers and MMU pagetable changes
  624. */
  625. index = rb->wptr;
  626. for (i = 0; i < rb->sizedwords; i++) {
  627. *data = rbptr[index];
  628. /*
  629. * Only parse IBs between the start and the rptr or the next
  630. * context switch, whichever comes first
  631. */
  632. if (parse_ibs == 0 && index == ib_parse_start)
  633. parse_ibs = 1;
  634. else if (index == rptr || adreno_rb_ctxtswitch(&rbptr[index]))
  635. parse_ibs = 0;
  636. if (parse_ibs && adreno_cmd_is_ib(rbptr[index])) {
  637. unsigned int ibaddr = rbptr[index + 1];
  638. unsigned int ibsize = rbptr[index + 2];
  639. /*
  640. * This will return non NULL if the IB happens to be
  641. * part of the context memory (i.e - context switch
  642. * command buffers)
  643. */
  644. struct kgsl_memdesc *memdesc =
  645. adreno_find_ctxtmem(device, ptbase, ibaddr,
  646. ibsize << 2);
  647. /* IOMMU uses a NOP IB placed in setsate memory */
  648. if (NULL == memdesc)
  649. if (kgsl_gpuaddr_in_memdesc(
  650. &device->mmu.setstate_memory,
  651. ibaddr, ibsize << 2))
  652. memdesc = &device->mmu.setstate_memory;
  653. /*
  654. * The IB from CP_IB1_BASE and the IBs for legacy
  655. * context switch go into the snapshot all
  656. * others get marked at GPU objects
  657. */
  658. if (memdesc != NULL)
  659. push_object(device, SNAPSHOT_OBJ_TYPE_IB,
  660. ptbase, ibaddr, ibsize);
  661. else
  662. parse_ib(device, ptbase, ibaddr, ibsize);
  663. }
  664. index = index + 1;
  665. if (index == rb->sizedwords)
  666. index = 0;
  667. data++;
  668. }
  669. /* Return the size of the section */
  670. return size + sizeof(*header);
  671. }
  672. static int snapshot_capture_mem_list(struct kgsl_device *device, void *snapshot,
  673. int remain, void *priv)
  674. {
  675. struct kgsl_snapshot_replay_mem_list *header = snapshot;
  676. struct kgsl_process_private *private = NULL;
  677. struct kgsl_process_private *tmp_private;
  678. phys_addr_t ptbase;
  679. struct rb_node *node;
  680. struct kgsl_mem_entry *entry = NULL;
  681. int num_mem;
  682. unsigned int *data = snapshot + sizeof(*header);
  683. ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
  684. mutex_lock(&kgsl_driver.process_mutex);
  685. list_for_each_entry(tmp_private, &kgsl_driver.process_list, list) {
  686. if (kgsl_mmu_pt_equal(&device->mmu, tmp_private->pagetable,
  687. ptbase)) {
  688. private = tmp_private;
  689. break;
  690. }
  691. }
  692. mutex_unlock(&kgsl_driver.process_mutex);
  693. if (!private) {
  694. KGSL_DRV_ERR(device,
  695. "Failed to get pointer to process private structure\n");
  696. return 0;
  697. }
  698. /* We need to know the number of memory objects that the process has */
  699. spin_lock(&private->mem_lock);
  700. for (node = rb_first(&private->mem_rb), num_mem = 0; node; ) {
  701. entry = rb_entry(node, struct kgsl_mem_entry, node);
  702. node = rb_next(&entry->node);
  703. num_mem++;
  704. }
  705. if (remain < ((num_mem * 3 * sizeof(unsigned int)) +
  706. sizeof(*header))) {
  707. KGSL_DRV_ERR(device,
  708. "snapshot: Not enough memory for the mem list section");
  709. spin_unlock(&private->mem_lock);
  710. return 0;
  711. }
  712. header->num_entries = num_mem;
  713. header->ptbase = (__u32)ptbase;
  714. /*
  715. * Walk throught the memory list and store the
  716. * tuples(gpuaddr, size, memtype) in snapshot
  717. */
  718. for (node = rb_first(&private->mem_rb); node; ) {
  719. entry = rb_entry(node, struct kgsl_mem_entry, node);
  720. node = rb_next(&entry->node);
  721. *data++ = entry->memdesc.gpuaddr;
  722. *data++ = entry->memdesc.size;
  723. *data++ = (entry->memdesc.priv & KGSL_MEMTYPE_MASK) >>
  724. KGSL_MEMTYPE_SHIFT;
  725. }
  726. spin_unlock(&private->mem_lock);
  727. return sizeof(*header) + (num_mem * 3 * sizeof(unsigned int));
  728. }
  729. /* Snapshot the memory for an indirect buffer */
  730. static int snapshot_ib(struct kgsl_device *device, void *snapshot,
  731. int remain, void *priv)
  732. {
  733. struct kgsl_snapshot_ib *header = snapshot;
  734. struct kgsl_snapshot_obj *obj = priv;
  735. unsigned int *src = obj->ptr;
  736. unsigned int *dst = snapshot + sizeof(*header);
  737. int i, ret;
  738. if (remain < (obj->dwords << 2) + sizeof(*header)) {
  739. KGSL_DRV_ERR(device,
  740. "snapshot: Not enough memory for the ib section");
  741. return 0;
  742. }
  743. /* Write the sub-header for the section */
  744. header->gpuaddr = obj->gpuaddr;
  745. header->ptbase = (__u32)obj->ptbase;
  746. header->size = obj->dwords;
  747. /* Make sure memory is mapped */
  748. if (obj->entry)
  749. src = (unsigned int *)
  750. kgsl_gpuaddr_to_vaddr(&obj->entry->memdesc, obj->gpuaddr);
  751. /* Write the contents of the ib */
  752. for (i = 0; i < obj->dwords; i++, src++, dst++) {
  753. *dst = *src;
  754. if (pkt_is_type3(*src)) {
  755. if ((obj->dwords - i) < type3_pkt_size(*src) + 1)
  756. continue;
  757. if (adreno_cmd_is_ib(*src)) {
  758. parse_ib(device, obj->ptbase, src[1],
  759. src[2]);
  760. } else {
  761. ret = ib_parse_type3(device, src, obj->ptbase);
  762. /* Stop parsing if the type3 decode fails */
  763. if (ret < 0)
  764. break;
  765. }
  766. }
  767. }
  768. return (obj->dwords << 2) + sizeof(*header);
  769. }
  770. /* Dump another item on the current pending list */
  771. static void *dump_object(struct kgsl_device *device, int obj, void *snapshot,
  772. int *remain)
  773. {
  774. switch (objbuf[obj].type) {
  775. case SNAPSHOT_OBJ_TYPE_IB:
  776. snapshot = kgsl_snapshot_add_section(device,
  777. KGSL_SNAPSHOT_SECTION_IB, snapshot, remain,
  778. snapshot_ib, &objbuf[obj]);
  779. if (objbuf[obj].entry) {
  780. kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc));
  781. kgsl_mem_entry_put(objbuf[obj].entry);
  782. }
  783. break;
  784. default:
  785. KGSL_DRV_ERR(device,
  786. "snapshot: Invalid snapshot object type: %d\n",
  787. objbuf[obj].type);
  788. break;
  789. }
  790. return snapshot;
  791. }
  792. /* adreno_snapshot - Snapshot the Adreno GPU state
  793. * @device - KGSL device to snapshot
  794. * @snapshot - Pointer to the start of memory to write into
  795. * @remain - A pointer to how many bytes of memory are remaining in the snapshot
  796. * @hang - set if this snapshot was automatically triggered by a GPU hang
  797. * This is a hook function called by kgsl_snapshot to snapshot the
  798. * Adreno specific information for the GPU snapshot. In turn, this function
  799. * calls the GPU specific snapshot function to get core specific information.
  800. */
  801. void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
  802. int hang)
  803. {
  804. int i;
  805. uint32_t ibbase, ibsize;
  806. struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
  807. phys_addr_t ptbase;
  808. /* Reset the list of objects */
  809. objbufptr = 0;
  810. snapshot_frozen_objsize = 0;
  811. /* Clear the caches for the visibilty stream and VBO parsing */
  812. vfd_control_0 = 0;
  813. vfd_index_max = 0;
  814. vsc_size_address = 0;
  815. memset(vsc_pipe, 0, sizeof(vsc_pipe));
  816. memset(vbo, 0, sizeof(vbo));
  817. /* Get the physical address of the MMU pagetable */
  818. ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
  819. /* Dump the ringbuffer */
  820. snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB,
  821. snapshot, remain, snapshot_rb, NULL);
  822. /*
  823. * Add a section that lists (gpuaddr, size, memtype) tuples of the
  824. * hanging process
  825. */
  826. snapshot = kgsl_snapshot_add_section(device,
  827. KGSL_SNAPSHOT_SECTION_MEMLIST, snapshot, remain,
  828. snapshot_capture_mem_list, NULL);
  829. /*
  830. * Make sure that the last IB1 that was being executed is dumped.
  831. * Since this was the last IB1 that was processed, we should have
  832. * already added it to the list during the ringbuffer parse but we
  833. * want to be double plus sure.
  834. */
  835. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ibbase);
  836. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ibsize);
  837. /*
  838. * The problem is that IB size from the register is the unprocessed size
  839. * of the buffer not the original size, so if we didn't catch this
  840. * buffer being directly used in the RB, then we might not be able to
  841. * dump the whle thing. Print a warning message so we can try to
  842. * figure how often this really happens.
  843. */
  844. if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) {
  845. push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
  846. ibbase, ibsize);
  847. KGSL_DRV_ERR(device, "CP_IB1_BASE not found in the ringbuffer. "
  848. "Dumping %x dwords of the buffer.\n", ibsize);
  849. }
  850. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BASE, &ibbase);
  851. adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ibsize);
  852. /*
  853. * Add the last parsed IB2 to the list. The IB2 should be found as we
  854. * parse the objects below, but we try to add it to the list first, so
  855. * it too can be parsed. Don't print an error message in this case - if
  856. * the IB2 is found during parsing, the list will be updated with the
  857. * correct size.
  858. */
  859. if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) {
  860. push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
  861. ibbase, ibsize);
  862. }
  863. /*
  864. * Go through the list of found objects and dump each one. As the IBs
  865. * are parsed, more objects might be found, and objbufptr will increase
  866. */
  867. for (i = 0; i < objbufptr; i++)
  868. snapshot = dump_object(device, i, snapshot, remain);
  869. /* Add GPU specific sections - registers mainly, but other stuff too */
  870. if (adreno_dev->gpudev->snapshot)
  871. snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
  872. remain, hang);
  873. if (snapshot_frozen_objsize)
  874. KGSL_DRV_ERR(device, "GPU snapshot froze %dKb of GPU buffers\n",
  875. snapshot_frozen_objsize / 1024);
  876. return snapshot;
  877. }