radeon_uvd.c 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. /*
  2. * Copyright 2011 Advanced Micro Devices, Inc.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Christian König <deathsimple@vodafone.de>
  29. */
  30. #include <linux/firmware.h>
  31. #include <linux/module.h>
  32. #include <drm/drmP.h>
  33. #include <drm/drm.h>
  34. #include "radeon.h"
  35. #include "radeon_ucode.h"
  36. #include "r600d.h"
  37. /* 1 second timeout */
  38. #define UVD_IDLE_TIMEOUT_MS 1000
  39. /* Firmware Names */
  40. #define FIRMWARE_R600 "/*(DEBLOBBED)*/"
  41. #define FIRMWARE_RS780 "/*(DEBLOBBED)*/"
  42. #define FIRMWARE_RV770 "/*(DEBLOBBED)*/"
  43. #define FIRMWARE_RV710 "/*(DEBLOBBED)*/"
  44. #define FIRMWARE_CYPRESS "/*(DEBLOBBED)*/"
  45. #define FIRMWARE_SUMO "/*(DEBLOBBED)*/"
  46. #define FIRMWARE_TAHITI "/*(DEBLOBBED)*/"
  47. #define FIRMWARE_BONAIRE_LEGACY "/*(DEBLOBBED)*/"
  48. #define FIRMWARE_BONAIRE "/*(DEBLOBBED)*/"
  49. /*(DEBLOBBED)*/
  50. static void radeon_uvd_idle_work_handler(struct work_struct *work);
  51. int radeon_uvd_init(struct radeon_device *rdev)
  52. {
  53. unsigned long bo_size;
  54. const char *fw_name = NULL, *legacy_fw_name = NULL;
  55. int i, r;
  56. INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
  57. switch (rdev->family) {
  58. case CHIP_RV610:
  59. case CHIP_RV630:
  60. case CHIP_RV670:
  61. case CHIP_RV620:
  62. case CHIP_RV635:
  63. legacy_fw_name = FIRMWARE_R600;
  64. break;
  65. case CHIP_RS780:
  66. case CHIP_RS880:
  67. legacy_fw_name = FIRMWARE_RS780;
  68. break;
  69. case CHIP_RV770:
  70. legacy_fw_name = FIRMWARE_RV770;
  71. break;
  72. case CHIP_RV710:
  73. case CHIP_RV730:
  74. case CHIP_RV740:
  75. legacy_fw_name = FIRMWARE_RV710;
  76. break;
  77. case CHIP_CYPRESS:
  78. case CHIP_HEMLOCK:
  79. case CHIP_JUNIPER:
  80. case CHIP_REDWOOD:
  81. case CHIP_CEDAR:
  82. legacy_fw_name = FIRMWARE_CYPRESS;
  83. break;
  84. case CHIP_SUMO:
  85. case CHIP_SUMO2:
  86. case CHIP_PALM:
  87. case CHIP_CAYMAN:
  88. case CHIP_BARTS:
  89. case CHIP_TURKS:
  90. case CHIP_CAICOS:
  91. legacy_fw_name = FIRMWARE_SUMO;
  92. break;
  93. case CHIP_TAHITI:
  94. case CHIP_VERDE:
  95. case CHIP_PITCAIRN:
  96. case CHIP_ARUBA:
  97. case CHIP_OLAND:
  98. legacy_fw_name = FIRMWARE_TAHITI;
  99. break;
  100. case CHIP_BONAIRE:
  101. case CHIP_KABINI:
  102. case CHIP_KAVERI:
  103. case CHIP_HAWAII:
  104. case CHIP_MULLINS:
  105. legacy_fw_name = FIRMWARE_BONAIRE_LEGACY;
  106. fw_name = FIRMWARE_BONAIRE;
  107. break;
  108. default:
  109. return -EINVAL;
  110. }
  111. rdev->uvd.fw_header_present = false;
  112. rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES;
  113. if (fw_name) {
  114. /* Let's try to load the newer firmware first */
  115. r = reject_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
  116. if (r) {
  117. dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
  118. fw_name);
  119. } else {
  120. struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data;
  121. unsigned version_major, version_minor, family_id;
  122. r = radeon_ucode_validate(rdev->uvd_fw);
  123. if (r)
  124. return r;
  125. rdev->uvd.fw_header_present = true;
  126. family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
  127. version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
  128. version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
  129. DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
  130. version_major, version_minor, family_id);
  131. /*
  132. * Limit the number of UVD handles depending on
  133. * microcode major and minor versions.
  134. */
  135. if ((version_major >= 0x01) && (version_minor >= 0x37))
  136. rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES;
  137. }
  138. }
  139. /*
  140. * In case there is only legacy firmware, or we encounter an error
  141. * while loading the new firmware, we fall back to loading the legacy
  142. * firmware now.
  143. */
  144. if (!fw_name || r) {
  145. r = reject_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev);
  146. if (r) {
  147. dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
  148. legacy_fw_name);
  149. return r;
  150. }
  151. }
  152. bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
  153. RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
  154. RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles;
  155. r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
  156. RADEON_GEM_DOMAIN_VRAM, 0, NULL,
  157. NULL, &rdev->uvd.vcpu_bo);
  158. if (r) {
  159. dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
  160. return r;
  161. }
  162. r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
  163. if (r) {
  164. radeon_bo_unref(&rdev->uvd.vcpu_bo);
  165. dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
  166. return r;
  167. }
  168. r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
  169. &rdev->uvd.gpu_addr);
  170. if (r) {
  171. radeon_bo_unreserve(rdev->uvd.vcpu_bo);
  172. radeon_bo_unref(&rdev->uvd.vcpu_bo);
  173. dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
  174. return r;
  175. }
  176. r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
  177. if (r) {
  178. dev_err(rdev->dev, "(%d) UVD map failed\n", r);
  179. return r;
  180. }
  181. radeon_bo_unreserve(rdev->uvd.vcpu_bo);
  182. for (i = 0; i < rdev->uvd.max_handles; ++i) {
  183. atomic_set(&rdev->uvd.handles[i], 0);
  184. rdev->uvd.filp[i] = NULL;
  185. rdev->uvd.img_size[i] = 0;
  186. }
  187. return 0;
  188. }
  189. void radeon_uvd_fini(struct radeon_device *rdev)
  190. {
  191. int r;
  192. if (rdev->uvd.vcpu_bo == NULL)
  193. return;
  194. r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
  195. if (!r) {
  196. radeon_bo_kunmap(rdev->uvd.vcpu_bo);
  197. radeon_bo_unpin(rdev->uvd.vcpu_bo);
  198. radeon_bo_unreserve(rdev->uvd.vcpu_bo);
  199. }
  200. radeon_bo_unref(&rdev->uvd.vcpu_bo);
  201. radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
  202. release_firmware(rdev->uvd_fw);
  203. }
  204. int radeon_uvd_suspend(struct radeon_device *rdev)
  205. {
  206. int i, r;
  207. if (rdev->uvd.vcpu_bo == NULL)
  208. return 0;
  209. for (i = 0; i < rdev->uvd.max_handles; ++i) {
  210. uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
  211. if (handle != 0) {
  212. struct radeon_fence *fence;
  213. radeon_uvd_note_usage(rdev);
  214. r = radeon_uvd_get_destroy_msg(rdev,
  215. R600_RING_TYPE_UVD_INDEX, handle, &fence);
  216. if (r) {
  217. DRM_ERROR("Error destroying UVD (%d)!\n", r);
  218. continue;
  219. }
  220. radeon_fence_wait(fence, false);
  221. radeon_fence_unref(&fence);
  222. rdev->uvd.filp[i] = NULL;
  223. atomic_set(&rdev->uvd.handles[i], 0);
  224. }
  225. }
  226. return 0;
  227. }
  228. int radeon_uvd_resume(struct radeon_device *rdev)
  229. {
  230. unsigned size;
  231. void *ptr;
  232. if (rdev->uvd.vcpu_bo == NULL)
  233. return -EINVAL;
  234. memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
  235. size = radeon_bo_size(rdev->uvd.vcpu_bo);
  236. size -= rdev->uvd_fw->size;
  237. ptr = rdev->uvd.cpu_addr;
  238. ptr += rdev->uvd_fw->size;
  239. memset(ptr, 0, size);
  240. return 0;
  241. }
  242. void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
  243. uint32_t allowed_domains)
  244. {
  245. int i;
  246. for (i = 0; i < rbo->placement.num_placement; ++i) {
  247. rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
  248. rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
  249. }
  250. /* If it must be in VRAM it must be in the first segment as well */
  251. if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
  252. return;
  253. /* abort if we already have more than one placement */
  254. if (rbo->placement.num_placement > 1)
  255. return;
  256. /* add another 256MB segment */
  257. rbo->placements[1] = rbo->placements[0];
  258. rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
  259. rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
  260. rbo->placement.num_placement++;
  261. rbo->placement.num_busy_placement++;
  262. }
  263. void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
  264. {
  265. int i, r;
  266. for (i = 0; i < rdev->uvd.max_handles; ++i) {
  267. uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
  268. if (handle != 0 && rdev->uvd.filp[i] == filp) {
  269. struct radeon_fence *fence;
  270. radeon_uvd_note_usage(rdev);
  271. r = radeon_uvd_get_destroy_msg(rdev,
  272. R600_RING_TYPE_UVD_INDEX, handle, &fence);
  273. if (r) {
  274. DRM_ERROR("Error destroying UVD (%d)!\n", r);
  275. continue;
  276. }
  277. radeon_fence_wait(fence, false);
  278. radeon_fence_unref(&fence);
  279. rdev->uvd.filp[i] = NULL;
  280. atomic_set(&rdev->uvd.handles[i], 0);
  281. }
  282. }
  283. }
  284. static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
  285. {
  286. unsigned stream_type = msg[4];
  287. unsigned width = msg[6];
  288. unsigned height = msg[7];
  289. unsigned dpb_size = msg[9];
  290. unsigned pitch = msg[28];
  291. unsigned width_in_mb = width / 16;
  292. unsigned height_in_mb = ALIGN(height / 16, 2);
  293. unsigned image_size, tmp, min_dpb_size;
  294. image_size = width * height;
  295. image_size += image_size / 2;
  296. image_size = ALIGN(image_size, 1024);
  297. switch (stream_type) {
  298. case 0: /* H264 */
  299. /* reference picture buffer */
  300. min_dpb_size = image_size * 17;
  301. /* macroblock context buffer */
  302. min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
  303. /* IT surface buffer */
  304. min_dpb_size += width_in_mb * height_in_mb * 32;
  305. break;
  306. case 1: /* VC1 */
  307. /* reference picture buffer */
  308. min_dpb_size = image_size * 3;
  309. /* CONTEXT_BUFFER */
  310. min_dpb_size += width_in_mb * height_in_mb * 128;
  311. /* IT surface buffer */
  312. min_dpb_size += width_in_mb * 64;
  313. /* DB surface buffer */
  314. min_dpb_size += width_in_mb * 128;
  315. /* BP */
  316. tmp = max(width_in_mb, height_in_mb);
  317. min_dpb_size += ALIGN(tmp * 7 * 16, 64);
  318. break;
  319. case 3: /* MPEG2 */
  320. /* reference picture buffer */
  321. min_dpb_size = image_size * 3;
  322. break;
  323. case 4: /* MPEG4 */
  324. /* reference picture buffer */
  325. min_dpb_size = image_size * 3;
  326. /* CM */
  327. min_dpb_size += width_in_mb * height_in_mb * 64;
  328. /* IT surface buffer */
  329. min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
  330. break;
  331. default:
  332. DRM_ERROR("UVD codec not handled %d!\n", stream_type);
  333. return -EINVAL;
  334. }
  335. if (width > pitch) {
  336. DRM_ERROR("Invalid UVD decoding target pitch!\n");
  337. return -EINVAL;
  338. }
  339. if (dpb_size < min_dpb_size) {
  340. DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
  341. dpb_size, min_dpb_size);
  342. return -EINVAL;
  343. }
  344. buf_sizes[0x1] = dpb_size;
  345. buf_sizes[0x2] = image_size;
  346. return 0;
  347. }
  348. static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
  349. unsigned stream_type)
  350. {
  351. switch (stream_type) {
  352. case 0: /* H264 */
  353. case 1: /* VC1 */
  354. /* always supported */
  355. return 0;
  356. case 3: /* MPEG2 */
  357. case 4: /* MPEG4 */
  358. /* only since UVD 3 */
  359. if (p->rdev->family >= CHIP_PALM)
  360. return 0;
  361. /* fall through */
  362. default:
  363. DRM_ERROR("UVD codec not supported by hardware %d!\n",
  364. stream_type);
  365. return -EINVAL;
  366. }
  367. }
  368. static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
  369. unsigned offset, unsigned buf_sizes[])
  370. {
  371. int32_t *msg, msg_type, handle;
  372. unsigned img_size = 0;
  373. struct fence *f;
  374. void *ptr;
  375. int i, r;
  376. if (offset & 0x3F) {
  377. DRM_ERROR("UVD messages must be 64 byte aligned!\n");
  378. return -EINVAL;
  379. }
  380. f = reservation_object_get_excl(bo->tbo.resv);
  381. if (f) {
  382. r = radeon_fence_wait((struct radeon_fence *)f, false);
  383. if (r) {
  384. DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
  385. return r;
  386. }
  387. }
  388. r = radeon_bo_kmap(bo, &ptr);
  389. if (r) {
  390. DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
  391. return r;
  392. }
  393. msg = ptr + offset;
  394. msg_type = msg[1];
  395. handle = msg[2];
  396. if (handle == 0) {
  397. DRM_ERROR("Invalid UVD handle!\n");
  398. return -EINVAL;
  399. }
  400. switch (msg_type) {
  401. case 0:
  402. /* it's a create msg, calc image size (width * height) */
  403. img_size = msg[7] * msg[8];
  404. r = radeon_uvd_validate_codec(p, msg[4]);
  405. radeon_bo_kunmap(bo);
  406. if (r)
  407. return r;
  408. /* try to alloc a new handle */
  409. for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
  410. if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
  411. DRM_ERROR("Handle 0x%x already in use!\n", handle);
  412. return -EINVAL;
  413. }
  414. if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
  415. p->rdev->uvd.filp[i] = p->filp;
  416. p->rdev->uvd.img_size[i] = img_size;
  417. return 0;
  418. }
  419. }
  420. DRM_ERROR("No more free UVD handles!\n");
  421. return -EINVAL;
  422. case 1:
  423. /* it's a decode msg, validate codec and calc buffer sizes */
  424. r = radeon_uvd_validate_codec(p, msg[4]);
  425. if (!r)
  426. r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
  427. radeon_bo_kunmap(bo);
  428. if (r)
  429. return r;
  430. /* validate the handle */
  431. for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
  432. if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
  433. if (p->rdev->uvd.filp[i] != p->filp) {
  434. DRM_ERROR("UVD handle collision detected!\n");
  435. return -EINVAL;
  436. }
  437. return 0;
  438. }
  439. }
  440. DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
  441. return -ENOENT;
  442. case 2:
  443. /* it's a destroy msg, free the handle */
  444. for (i = 0; i < p->rdev->uvd.max_handles; ++i)
  445. atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
  446. radeon_bo_kunmap(bo);
  447. return 0;
  448. default:
  449. DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
  450. return -EINVAL;
  451. }
  452. BUG();
  453. return -EINVAL;
  454. }
  455. static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
  456. int data0, int data1,
  457. unsigned buf_sizes[], bool *has_msg_cmd)
  458. {
  459. struct radeon_cs_chunk *relocs_chunk;
  460. struct radeon_bo_list *reloc;
  461. unsigned idx, cmd, offset;
  462. uint64_t start, end;
  463. int r;
  464. relocs_chunk = p->chunk_relocs;
  465. offset = radeon_get_ib_value(p, data0);
  466. idx = radeon_get_ib_value(p, data1);
  467. if (idx >= relocs_chunk->length_dw) {
  468. DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
  469. idx, relocs_chunk->length_dw);
  470. return -EINVAL;
  471. }
  472. reloc = &p->relocs[(idx / 4)];
  473. start = reloc->gpu_offset;
  474. end = start + radeon_bo_size(reloc->robj);
  475. start += offset;
  476. p->ib.ptr[data0] = start & 0xFFFFFFFF;
  477. p->ib.ptr[data1] = start >> 32;
  478. cmd = radeon_get_ib_value(p, p->idx) >> 1;
  479. if (cmd < 0x4) {
  480. if (end <= start) {
  481. DRM_ERROR("invalid reloc offset %X!\n", offset);
  482. return -EINVAL;
  483. }
  484. if ((end - start) < buf_sizes[cmd]) {
  485. DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
  486. (unsigned)(end - start), buf_sizes[cmd]);
  487. return -EINVAL;
  488. }
  489. } else if (cmd != 0x100) {
  490. DRM_ERROR("invalid UVD command %X!\n", cmd);
  491. return -EINVAL;
  492. }
  493. if ((start >> 28) != ((end - 1) >> 28)) {
  494. DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
  495. start, end);
  496. return -EINVAL;
  497. }
  498. /* TODO: is this still necessary on NI+ ? */
  499. if ((cmd == 0 || cmd == 0x3) &&
  500. (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
  501. DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
  502. start, end);
  503. return -EINVAL;
  504. }
  505. if (cmd == 0) {
  506. if (*has_msg_cmd) {
  507. DRM_ERROR("More than one message in a UVD-IB!\n");
  508. return -EINVAL;
  509. }
  510. *has_msg_cmd = true;
  511. r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
  512. if (r)
  513. return r;
  514. } else if (!*has_msg_cmd) {
  515. DRM_ERROR("Message needed before other commands are send!\n");
  516. return -EINVAL;
  517. }
  518. return 0;
  519. }
  520. static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
  521. struct radeon_cs_packet *pkt,
  522. int *data0, int *data1,
  523. unsigned buf_sizes[],
  524. bool *has_msg_cmd)
  525. {
  526. int i, r;
  527. p->idx++;
  528. for (i = 0; i <= pkt->count; ++i) {
  529. switch (pkt->reg + i*4) {
  530. case UVD_GPCOM_VCPU_DATA0:
  531. *data0 = p->idx;
  532. break;
  533. case UVD_GPCOM_VCPU_DATA1:
  534. *data1 = p->idx;
  535. break;
  536. case UVD_GPCOM_VCPU_CMD:
  537. r = radeon_uvd_cs_reloc(p, *data0, *data1,
  538. buf_sizes, has_msg_cmd);
  539. if (r)
  540. return r;
  541. break;
  542. case UVD_ENGINE_CNTL:
  543. case UVD_NO_OP:
  544. break;
  545. default:
  546. DRM_ERROR("Invalid reg 0x%X!\n",
  547. pkt->reg + i*4);
  548. return -EINVAL;
  549. }
  550. p->idx++;
  551. }
  552. return 0;
  553. }
  554. int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
  555. {
  556. struct radeon_cs_packet pkt;
  557. int r, data0 = 0, data1 = 0;
  558. /* does the IB has a msg command */
  559. bool has_msg_cmd = false;
  560. /* minimum buffer sizes */
  561. unsigned buf_sizes[] = {
  562. [0x00000000] = 2048,
  563. [0x00000001] = 32 * 1024 * 1024,
  564. [0x00000002] = 2048 * 1152 * 3,
  565. [0x00000003] = 2048,
  566. };
  567. if (p->chunk_ib->length_dw % 16) {
  568. DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
  569. p->chunk_ib->length_dw);
  570. return -EINVAL;
  571. }
  572. if (p->chunk_relocs == NULL) {
  573. DRM_ERROR("No relocation chunk !\n");
  574. return -EINVAL;
  575. }
  576. do {
  577. r = radeon_cs_packet_parse(p, &pkt, p->idx);
  578. if (r)
  579. return r;
  580. switch (pkt.type) {
  581. case RADEON_PACKET_TYPE0:
  582. r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
  583. buf_sizes, &has_msg_cmd);
  584. if (r)
  585. return r;
  586. break;
  587. case RADEON_PACKET_TYPE2:
  588. p->idx += pkt.count + 2;
  589. break;
  590. default:
  591. DRM_ERROR("Unknown packet type %d !\n", pkt.type);
  592. return -EINVAL;
  593. }
  594. } while (p->idx < p->chunk_ib->length_dw);
  595. if (!has_msg_cmd) {
  596. DRM_ERROR("UVD-IBs need a msg command!\n");
  597. return -EINVAL;
  598. }
  599. return 0;
  600. }
  601. static int radeon_uvd_send_msg(struct radeon_device *rdev,
  602. int ring, uint64_t addr,
  603. struct radeon_fence **fence)
  604. {
  605. struct radeon_ib ib;
  606. int i, r;
  607. r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
  608. if (r)
  609. return r;
  610. ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
  611. ib.ptr[1] = addr;
  612. ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
  613. ib.ptr[3] = addr >> 32;
  614. ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
  615. ib.ptr[5] = 0;
  616. for (i = 6; i < 16; i += 2) {
  617. ib.ptr[i] = PACKET0(UVD_NO_OP, 0);
  618. ib.ptr[i+1] = 0;
  619. }
  620. ib.length_dw = 16;
  621. r = radeon_ib_schedule(rdev, &ib, NULL, false);
  622. if (fence)
  623. *fence = radeon_fence_ref(ib.fence);
  624. radeon_ib_free(rdev, &ib);
  625. return r;
  626. }
  627. /*
  628. * multiple fence commands without any stream commands in between can
  629. * crash the vcpu so just try to emmit a dummy create/destroy msg to
  630. * avoid this
  631. */
  632. int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
  633. uint32_t handle, struct radeon_fence **fence)
  634. {
  635. /* we use the last page of the vcpu bo for the UVD message */
  636. uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
  637. RADEON_GPU_PAGE_SIZE;
  638. uint32_t *msg = rdev->uvd.cpu_addr + offs;
  639. uint64_t addr = rdev->uvd.gpu_addr + offs;
  640. int r, i;
  641. r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
  642. if (r)
  643. return r;
  644. /* stitch together an UVD create msg */
  645. msg[0] = cpu_to_le32(0x00000de4);
  646. msg[1] = cpu_to_le32(0x00000000);
  647. msg[2] = cpu_to_le32(handle);
  648. msg[3] = cpu_to_le32(0x00000000);
  649. msg[4] = cpu_to_le32(0x00000000);
  650. msg[5] = cpu_to_le32(0x00000000);
  651. msg[6] = cpu_to_le32(0x00000000);
  652. msg[7] = cpu_to_le32(0x00000780);
  653. msg[8] = cpu_to_le32(0x00000440);
  654. msg[9] = cpu_to_le32(0x00000000);
  655. msg[10] = cpu_to_le32(0x01b37000);
  656. for (i = 11; i < 1024; ++i)
  657. msg[i] = cpu_to_le32(0x0);
  658. r = radeon_uvd_send_msg(rdev, ring, addr, fence);
  659. radeon_bo_unreserve(rdev->uvd.vcpu_bo);
  660. return r;
  661. }
  662. int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
  663. uint32_t handle, struct radeon_fence **fence)
  664. {
  665. /* we use the last page of the vcpu bo for the UVD message */
  666. uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
  667. RADEON_GPU_PAGE_SIZE;
  668. uint32_t *msg = rdev->uvd.cpu_addr + offs;
  669. uint64_t addr = rdev->uvd.gpu_addr + offs;
  670. int r, i;
  671. r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
  672. if (r)
  673. return r;
  674. /* stitch together an UVD destroy msg */
  675. msg[0] = cpu_to_le32(0x00000de4);
  676. msg[1] = cpu_to_le32(0x00000002);
  677. msg[2] = cpu_to_le32(handle);
  678. msg[3] = cpu_to_le32(0x00000000);
  679. for (i = 4; i < 1024; ++i)
  680. msg[i] = cpu_to_le32(0x0);
  681. r = radeon_uvd_send_msg(rdev, ring, addr, fence);
  682. radeon_bo_unreserve(rdev->uvd.vcpu_bo);
  683. return r;
  684. }
  685. /**
  686. * radeon_uvd_count_handles - count number of open streams
  687. *
  688. * @rdev: radeon_device pointer
  689. * @sd: number of SD streams
  690. * @hd: number of HD streams
  691. *
  692. * Count the number of open SD/HD streams as a hint for power mangement
  693. */
  694. static void radeon_uvd_count_handles(struct radeon_device *rdev,
  695. unsigned *sd, unsigned *hd)
  696. {
  697. unsigned i;
  698. *sd = 0;
  699. *hd = 0;
  700. for (i = 0; i < rdev->uvd.max_handles; ++i) {
  701. if (!atomic_read(&rdev->uvd.handles[i]))
  702. continue;
  703. if (rdev->uvd.img_size[i] >= 720*576)
  704. ++(*hd);
  705. else
  706. ++(*sd);
  707. }
  708. }
  709. static void radeon_uvd_idle_work_handler(struct work_struct *work)
  710. {
  711. struct radeon_device *rdev =
  712. container_of(work, struct radeon_device, uvd.idle_work.work);
  713. if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
  714. if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
  715. radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
  716. &rdev->pm.dpm.hd);
  717. radeon_dpm_enable_uvd(rdev, false);
  718. } else {
  719. radeon_set_uvd_clocks(rdev, 0, 0);
  720. }
  721. } else {
  722. schedule_delayed_work(&rdev->uvd.idle_work,
  723. msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
  724. }
  725. }
  726. void radeon_uvd_note_usage(struct radeon_device *rdev)
  727. {
  728. bool streams_changed = false;
  729. bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
  730. set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
  731. msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
  732. if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
  733. unsigned hd = 0, sd = 0;
  734. radeon_uvd_count_handles(rdev, &sd, &hd);
  735. if ((rdev->pm.dpm.sd != sd) ||
  736. (rdev->pm.dpm.hd != hd)) {
  737. rdev->pm.dpm.sd = sd;
  738. rdev->pm.dpm.hd = hd;
  739. /* disable this for now */
  740. /*streams_changed = true;*/
  741. }
  742. }
  743. if (set_clocks || streams_changed) {
  744. if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
  745. radeon_dpm_enable_uvd(rdev, true);
  746. } else {
  747. radeon_set_uvd_clocks(rdev, 53300, 40000);
  748. }
  749. }
  750. }
  751. static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
  752. unsigned target_freq,
  753. unsigned pd_min,
  754. unsigned pd_even)
  755. {
  756. unsigned post_div = vco_freq / target_freq;
  757. /* adjust to post divider minimum value */
  758. if (post_div < pd_min)
  759. post_div = pd_min;
  760. /* we alway need a frequency less than or equal the target */
  761. if ((vco_freq / post_div) > target_freq)
  762. post_div += 1;
  763. /* post dividers above a certain value must be even */
  764. if (post_div > pd_even && post_div % 2)
  765. post_div += 1;
  766. return post_div;
  767. }
  768. /**
  769. * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
  770. *
  771. * @rdev: radeon_device pointer
  772. * @vclk: wanted VCLK
  773. * @dclk: wanted DCLK
  774. * @vco_min: minimum VCO frequency
  775. * @vco_max: maximum VCO frequency
  776. * @fb_factor: factor to multiply vco freq with
  777. * @fb_mask: limit and bitmask for feedback divider
  778. * @pd_min: post divider minimum
  779. * @pd_max: post divider maximum
  780. * @pd_even: post divider must be even above this value
  781. * @optimal_fb_div: resulting feedback divider
  782. * @optimal_vclk_div: resulting vclk post divider
  783. * @optimal_dclk_div: resulting dclk post divider
  784. *
  785. * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
  786. * Returns zero on success -EINVAL on error.
  787. */
  788. int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
  789. unsigned vclk, unsigned dclk,
  790. unsigned vco_min, unsigned vco_max,
  791. unsigned fb_factor, unsigned fb_mask,
  792. unsigned pd_min, unsigned pd_max,
  793. unsigned pd_even,
  794. unsigned *optimal_fb_div,
  795. unsigned *optimal_vclk_div,
  796. unsigned *optimal_dclk_div)
  797. {
  798. unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
  799. /* start off with something large */
  800. unsigned optimal_score = ~0;
  801. /* loop through vco from low to high */
  802. vco_min = max(max(vco_min, vclk), dclk);
  803. for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
  804. uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
  805. unsigned vclk_div, dclk_div, score;
  806. do_div(fb_div, ref_freq);
  807. /* fb div out of range ? */
  808. if (fb_div > fb_mask)
  809. break; /* it can oly get worse */
  810. fb_div &= fb_mask;
  811. /* calc vclk divider with current vco freq */
  812. vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
  813. pd_min, pd_even);
  814. if (vclk_div > pd_max)
  815. break; /* vco is too big, it has to stop */
  816. /* calc dclk divider with current vco freq */
  817. dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
  818. pd_min, pd_even);
  819. if (dclk_div > pd_max)
  820. break; /* vco is too big, it has to stop */
  821. /* calc score with current vco freq */
  822. score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
  823. /* determine if this vco setting is better than current optimal settings */
  824. if (score < optimal_score) {
  825. *optimal_fb_div = fb_div;
  826. *optimal_vclk_div = vclk_div;
  827. *optimal_dclk_div = dclk_div;
  828. optimal_score = score;
  829. if (optimal_score == 0)
  830. break; /* it can't get better than this */
  831. }
  832. }
  833. /* did we found a valid setup ? */
  834. if (optimal_score == ~0)
  835. return -EINVAL;
  836. return 0;
  837. }
  838. int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
  839. unsigned cg_upll_func_cntl)
  840. {
  841. unsigned i;
  842. /* make sure UPLL_CTLREQ is deasserted */
  843. WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
  844. mdelay(10);
  845. /* assert UPLL_CTLREQ */
  846. WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
  847. /* wait for CTLACK and CTLACK2 to get asserted */
  848. for (i = 0; i < 100; ++i) {
  849. uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
  850. if ((RREG32(cg_upll_func_cntl) & mask) == mask)
  851. break;
  852. mdelay(10);
  853. }
  854. /* deassert UPLL_CTLREQ */
  855. WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
  856. if (i == 100) {
  857. DRM_ERROR("Timeout setting UVD clocks!\n");
  858. return -ETIMEDOUT;
  859. }
  860. return 0;
  861. }