EfbInterface.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720
  1. // Copyright 2009 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoBackends/Software/EfbInterface.h"
  4. #include <algorithm>
  5. #include <array>
  6. #include <cstddef>
  7. #include <cstring>
  8. #include <vector>
  9. #include "Common/CommonTypes.h"
  10. #include "Common/Logging/Log.h"
  11. #include "VideoBackends/Software/CopyRegion.h"
  12. #include "VideoCommon/BPMemory.h"
  13. #include "VideoCommon/LookUpTables.h"
  14. #include "VideoCommon/PerfQueryBase.h"
  15. #include "VideoCommon/VideoCommon.h"
  16. namespace EfbInterface
  17. {
  18. static std::array<u8, EFB_WIDTH * EFB_HEIGHT * 6> efb;
  19. static std::array<u32, PQ_NUM_MEMBERS> perf_values;
  20. static inline u32 GetColorOffset(u16 x, u16 y)
  21. {
  22. return (x + y * EFB_WIDTH) * 3;
  23. }
  24. static inline u32 GetDepthOffset(u16 x, u16 y)
  25. {
  26. constexpr u32 depth_buffer_start = EFB_WIDTH * EFB_HEIGHT * 3;
  27. return (x + y * EFB_WIDTH) * 3 + depth_buffer_start;
  28. }
  29. static void SetPixelAlphaOnly(u32 offset, u8 a)
  30. {
  31. switch (bpmem.zcontrol.pixel_format)
  32. {
  33. case PixelFormat::RGB8_Z24:
  34. case PixelFormat::Z24:
  35. case PixelFormat::RGB565_Z16:
  36. // do nothing
  37. break;
  38. case PixelFormat::RGBA6_Z24:
  39. {
  40. u32 a32 = a;
  41. u32* dst = (u32*)&efb[offset];
  42. u32 val = *dst & 0xffffffc0;
  43. val |= (a32 >> 2) & 0x0000003f;
  44. *dst = val;
  45. }
  46. break;
  47. default:
  48. ERROR_LOG_FMT(VIDEO, "Unsupported pixel format: {}", bpmem.zcontrol.pixel_format);
  49. break;
  50. }
  51. }
  52. static void SetPixelColorOnly(u32 offset, u8* rgb)
  53. {
  54. switch (bpmem.zcontrol.pixel_format)
  55. {
  56. case PixelFormat::RGB8_Z24:
  57. case PixelFormat::Z24:
  58. {
  59. u32 src = *(u32*)rgb;
  60. u32* dst = (u32*)&efb[offset];
  61. u32 val = *dst & 0xff000000;
  62. val |= src >> 8;
  63. *dst = val;
  64. }
  65. break;
  66. case PixelFormat::RGBA6_Z24:
  67. {
  68. u32 src = *(u32*)rgb;
  69. u32* dst = (u32*)&efb[offset];
  70. u32 val = *dst & 0xff00003f;
  71. val |= (src >> 4) & 0x00000fc0; // blue
  72. val |= (src >> 6) & 0x0003f000; // green
  73. val |= (src >> 8) & 0x00fc0000; // red
  74. *dst = val;
  75. }
  76. break;
  77. case PixelFormat::RGB565_Z16:
  78. {
  79. // TODO: RGB565_Z16 is not supported correctly yet
  80. u32 src = *(u32*)rgb;
  81. u32* dst = (u32*)&efb[offset];
  82. u32 val = *dst & 0xff000000;
  83. val |= src >> 8;
  84. *dst = val;
  85. }
  86. break;
  87. default:
  88. ERROR_LOG_FMT(VIDEO, "Unsupported pixel format: {}", bpmem.zcontrol.pixel_format);
  89. break;
  90. }
  91. }
  92. static void SetPixelAlphaColor(u32 offset, u8* color)
  93. {
  94. switch (bpmem.zcontrol.pixel_format)
  95. {
  96. case PixelFormat::RGB8_Z24:
  97. case PixelFormat::Z24:
  98. {
  99. u32 src = *(u32*)color;
  100. u32* dst = (u32*)&efb[offset];
  101. u32 val = *dst & 0xff000000;
  102. val |= src >> 8;
  103. *dst = val;
  104. }
  105. break;
  106. case PixelFormat::RGBA6_Z24:
  107. {
  108. u32 src = *(u32*)color;
  109. u32* dst = (u32*)&efb[offset];
  110. u32 val = *dst & 0xff000000;
  111. val |= (src >> 2) & 0x0000003f; // alpha
  112. val |= (src >> 4) & 0x00000fc0; // blue
  113. val |= (src >> 6) & 0x0003f000; // green
  114. val |= (src >> 8) & 0x00fc0000; // red
  115. *dst = val;
  116. }
  117. break;
  118. case PixelFormat::RGB565_Z16:
  119. {
  120. // TODO: RGB565_Z16 is not supported correctly yet
  121. u32 src = *(u32*)color;
  122. u32* dst = (u32*)&efb[offset];
  123. u32 val = *dst & 0xff000000;
  124. val |= src >> 8;
  125. *dst = val;
  126. }
  127. break;
  128. default:
  129. ERROR_LOG_FMT(VIDEO, "Unsupported pixel format: {}", bpmem.zcontrol.pixel_format);
  130. break;
  131. }
  132. }
  133. static u32 GetPixelColor(u32 offset)
  134. {
  135. u32 src;
  136. std::memcpy(&src, &efb[offset], sizeof(u32));
  137. switch (bpmem.zcontrol.pixel_format)
  138. {
  139. case PixelFormat::RGB8_Z24:
  140. case PixelFormat::Z24:
  141. return 0xff | ((src & 0x00ffffff) << 8);
  142. case PixelFormat::RGBA6_Z24:
  143. return Convert6To8(src & 0x3f) | // Alpha
  144. Convert6To8((src >> 6) & 0x3f) << 8 | // Blue
  145. Convert6To8((src >> 12) & 0x3f) << 16 | // Green
  146. Convert6To8((src >> 18) & 0x3f) << 24; // Red
  147. case PixelFormat::RGB565_Z16:
  148. // TODO: RGB565_Z16 is not supported correctly yet
  149. return 0xff | ((src & 0x00ffffff) << 8);
  150. default:
  151. ERROR_LOG_FMT(VIDEO, "Unsupported pixel format: {}", bpmem.zcontrol.pixel_format);
  152. return 0;
  153. }
  154. }
  155. static void SetPixelDepth(u32 offset, u32 depth)
  156. {
  157. switch (bpmem.zcontrol.pixel_format)
  158. {
  159. case PixelFormat::RGB8_Z24:
  160. case PixelFormat::RGBA6_Z24:
  161. case PixelFormat::Z24:
  162. {
  163. u32* dst = (u32*)&efb[offset];
  164. u32 val = *dst & 0xff000000;
  165. val |= depth & 0x00ffffff;
  166. *dst = val;
  167. }
  168. break;
  169. case PixelFormat::RGB565_Z16:
  170. {
  171. // TODO: RGB565_Z16 is not supported correctly yet
  172. u32* dst = (u32*)&efb[offset];
  173. u32 val = *dst & 0xff000000;
  174. val |= depth & 0x00ffffff;
  175. *dst = val;
  176. }
  177. break;
  178. default:
  179. ERROR_LOG_FMT(VIDEO, "Unsupported pixel format: {}", bpmem.zcontrol.pixel_format);
  180. break;
  181. }
  182. }
  183. static u32 GetPixelDepth(u32 offset)
  184. {
  185. u32 depth = 0;
  186. switch (bpmem.zcontrol.pixel_format)
  187. {
  188. case PixelFormat::RGB8_Z24:
  189. case PixelFormat::RGBA6_Z24:
  190. case PixelFormat::Z24:
  191. {
  192. depth = (*(u32*)&efb[offset]) & 0x00ffffff;
  193. }
  194. break;
  195. case PixelFormat::RGB565_Z16:
  196. {
  197. // TODO: RGB565_Z16 is not supported correctly yet
  198. depth = (*(u32*)&efb[offset]) & 0x00ffffff;
  199. }
  200. break;
  201. default:
  202. ERROR_LOG_FMT(VIDEO, "Unsupported pixel format: {}", bpmem.zcontrol.pixel_format);
  203. break;
  204. }
  205. return depth;
  206. }
  207. static u32 GetSourceFactor(u8* srcClr, u8* dstClr, SrcBlendFactor mode)
  208. {
  209. switch (mode)
  210. {
  211. case SrcBlendFactor::Zero:
  212. return 0;
  213. case SrcBlendFactor::One:
  214. return 0xffffffff;
  215. case SrcBlendFactor::DstClr:
  216. return *(u32*)dstClr;
  217. case SrcBlendFactor::InvDstClr:
  218. return 0xffffffff - *(u32*)dstClr;
  219. case SrcBlendFactor::SrcAlpha:
  220. {
  221. u8 alpha = srcClr[ALP_C];
  222. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  223. return factor;
  224. }
  225. case SrcBlendFactor::InvSrcAlpha:
  226. {
  227. u8 alpha = 0xff - srcClr[ALP_C];
  228. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  229. return factor;
  230. }
  231. case SrcBlendFactor::DstAlpha:
  232. {
  233. u8 alpha = dstClr[ALP_C];
  234. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  235. return factor;
  236. }
  237. case SrcBlendFactor::InvDstAlpha:
  238. {
  239. u8 alpha = 0xff - dstClr[ALP_C];
  240. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  241. return factor;
  242. }
  243. }
  244. return 0;
  245. }
  246. static u32 GetDestinationFactor(u8* srcClr, u8* dstClr, DstBlendFactor mode)
  247. {
  248. switch (mode)
  249. {
  250. case DstBlendFactor::Zero:
  251. return 0;
  252. case DstBlendFactor::One:
  253. return 0xffffffff;
  254. case DstBlendFactor::SrcClr:
  255. return *(u32*)srcClr;
  256. case DstBlendFactor::InvSrcClr:
  257. return 0xffffffff - *(u32*)srcClr;
  258. case DstBlendFactor::SrcAlpha:
  259. {
  260. u8 alpha = srcClr[ALP_C];
  261. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  262. return factor;
  263. }
  264. case DstBlendFactor::InvSrcAlpha:
  265. {
  266. u8 alpha = 0xff - srcClr[ALP_C];
  267. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  268. return factor;
  269. }
  270. case DstBlendFactor::DstAlpha:
  271. {
  272. u8 alpha = dstClr[ALP_C] & 0xff;
  273. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  274. return factor;
  275. }
  276. case DstBlendFactor::InvDstAlpha:
  277. {
  278. u8 alpha = 0xff - dstClr[ALP_C];
  279. u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
  280. return factor;
  281. }
  282. }
  283. return 0;
  284. }
  285. static void BlendColor(u8* srcClr, u8* dstClr)
  286. {
  287. u32 srcFactor = GetSourceFactor(srcClr, dstClr, bpmem.blendmode.srcfactor);
  288. u32 dstFactor = GetDestinationFactor(srcClr, dstClr, bpmem.blendmode.dstfactor);
  289. for (int i = 0; i < 4; i++)
  290. {
  291. // add MSB of factors to make their range 0 -> 256
  292. u32 sf = (srcFactor & 0xff);
  293. sf += sf >> 7;
  294. u32 df = (dstFactor & 0xff);
  295. df += df >> 7;
  296. u32 color = (srcClr[i] * sf + dstClr[i] * df) >> 8;
  297. dstClr[i] = (color > 255) ? 255 : color;
  298. dstFactor >>= 8;
  299. srcFactor >>= 8;
  300. }
  301. }
  302. static void LogicBlend(u32 srcClr, u32* dstClr, LogicOp op)
  303. {
  304. switch (op)
  305. {
  306. case LogicOp::Clear:
  307. *dstClr = 0;
  308. break;
  309. case LogicOp::And:
  310. *dstClr = srcClr & *dstClr;
  311. break;
  312. case LogicOp::AndReverse:
  313. *dstClr = srcClr & (~*dstClr);
  314. break;
  315. case LogicOp::Copy:
  316. *dstClr = srcClr;
  317. break;
  318. case LogicOp::AndInverted:
  319. *dstClr = (~srcClr) & *dstClr;
  320. break;
  321. case LogicOp::NoOp:
  322. // Do nothing
  323. break;
  324. case LogicOp::Xor:
  325. *dstClr = srcClr ^ *dstClr;
  326. break;
  327. case LogicOp::Or:
  328. *dstClr = srcClr | *dstClr;
  329. break;
  330. case LogicOp::Nor:
  331. *dstClr = ~(srcClr | *dstClr);
  332. break;
  333. case LogicOp::Equiv:
  334. *dstClr = ~(srcClr ^ *dstClr);
  335. break;
  336. case LogicOp::Invert:
  337. *dstClr = ~*dstClr;
  338. break;
  339. case LogicOp::OrReverse:
  340. *dstClr = srcClr | (~*dstClr);
  341. break;
  342. case LogicOp::CopyInverted:
  343. *dstClr = ~srcClr;
  344. break;
  345. case LogicOp::OrInverted:
  346. *dstClr = (~srcClr) | *dstClr;
  347. break;
  348. case LogicOp::Nand:
  349. *dstClr = ~(srcClr & *dstClr);
  350. break;
  351. case LogicOp::Set:
  352. *dstClr = 0xffffffff;
  353. break;
  354. }
  355. }
  356. static void SubtractBlend(u8* srcClr, u8* dstClr)
  357. {
  358. for (int i = 0; i < 4; i++)
  359. {
  360. int c = (int)dstClr[i] - (int)srcClr[i];
  361. dstClr[i] = (c < 0) ? 0 : c;
  362. }
  363. }
  364. static void Dither(u16 x, u16 y, u8* color)
  365. {
  366. // No blending for RGB8 mode
  367. if (!bpmem.blendmode.dither || bpmem.zcontrol.pixel_format != PixelFormat::RGBA6_Z24)
  368. return;
  369. // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
  370. static const u8 dither[2][2] = {{0, 2}, {3, 1}};
  371. // Only the color channels are dithered?
  372. for (int i = BLU_C; i <= RED_C; i++)
  373. color[i] = ((color[i] - (color[i] >> 6)) + dither[y & 1][x & 1]) & 0xfc;
  374. }
  375. void BlendTev(u16 x, u16 y, u8* color)
  376. {
  377. const u32 offset = GetColorOffset(x, y);
  378. u32 dstClr = GetPixelColor(offset);
  379. u8* dstClrPtr = (u8*)&dstClr;
  380. if (bpmem.blendmode.blendenable)
  381. {
  382. if (bpmem.blendmode.subtract)
  383. SubtractBlend(color, dstClrPtr);
  384. else
  385. BlendColor(color, dstClrPtr);
  386. }
  387. else if (bpmem.blendmode.logicopenable)
  388. {
  389. LogicBlend(*((u32*)color), &dstClr, bpmem.blendmode.logicmode);
  390. }
  391. else
  392. {
  393. dstClrPtr = color;
  394. }
  395. if (bpmem.dstalpha.enable)
  396. dstClrPtr[ALP_C] = bpmem.dstalpha.alpha;
  397. if (bpmem.blendmode.colorupdate)
  398. {
  399. Dither(x, y, dstClrPtr);
  400. if (bpmem.blendmode.alphaupdate)
  401. SetPixelAlphaColor(offset, dstClrPtr);
  402. else
  403. SetPixelColorOnly(offset, dstClrPtr);
  404. }
  405. else if (bpmem.blendmode.alphaupdate)
  406. {
  407. SetPixelAlphaOnly(offset, dstClrPtr[ALP_C]);
  408. }
  409. }
  410. void SetColor(u16 x, u16 y, u8* color)
  411. {
  412. u32 offset = GetColorOffset(x, y);
  413. if (bpmem.blendmode.colorupdate)
  414. {
  415. if (bpmem.blendmode.alphaupdate)
  416. SetPixelAlphaColor(offset, color);
  417. else
  418. SetPixelColorOnly(offset, color);
  419. }
  420. else if (bpmem.blendmode.alphaupdate)
  421. {
  422. SetPixelAlphaOnly(offset, color[ALP_C]);
  423. }
  424. }
  425. void SetDepth(u16 x, u16 y, u32 depth)
  426. {
  427. if (bpmem.zmode.updateenable)
  428. SetPixelDepth(GetDepthOffset(x, y), depth);
  429. }
  430. u32 GetColor(u16 x, u16 y)
  431. {
  432. u32 offset = GetColorOffset(x, y);
  433. return GetPixelColor(offset);
  434. }
  435. static u32 VerticalFilter(const std::array<u32, 3>& colors,
  436. const std::array<u8, 7>& filterCoefficients)
  437. {
  438. u8 in_colors[3][4];
  439. std::memcpy(&in_colors, colors.data(), sizeof(in_colors));
  440. // Alpha channel is not used
  441. u8 out_color[4];
  442. out_color[ALP_C] = 0;
  443. // All Coefficients should sum to 64, otherwise the total brightness will change, which many games
  444. // do on purpose to implement a brightness filter across the whole copy.
  445. for (int i = BLU_C; i <= RED_C; i++)
  446. {
  447. // TODO: implement support for multisampling.
  448. // In non-multisampling mode:
  449. // * Coefficients 2, 3 and 4 sample from the current pixel.
  450. // * Coefficients 0 and 1 sample from the pixel above this one
  451. // * Coefficients 5 and 6 sample from the pixel below this one
  452. int sum =
  453. in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) +
  454. in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) +
  455. in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]);
  456. // TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware.
  457. out_color[i] = std::min(255, sum >> 6); // clamp larger values to 255
  458. }
  459. u32 out_color32;
  460. std::memcpy(&out_color32, out_color, sizeof(out_color32));
  461. return out_color32;
  462. }
  463. static u32 GammaCorrection(u32 color, const float gamma_rcp)
  464. {
  465. u8 in_colors[4];
  466. std::memcpy(&in_colors, &color, sizeof(in_colors));
  467. u8 out_color[4];
  468. for (int i = BLU_C; i <= RED_C; i++)
  469. {
  470. out_color[i] = static_cast<u8>(
  471. std::clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f));
  472. }
  473. u32 out_color32;
  474. std::memcpy(&out_color32, out_color, sizeof(out_color32));
  475. return out_color32;
  476. }
  477. // For internal used only, return a non-normalized value, which saves work later.
  478. static yuv444 ConvertColorToYUV(u32 color)
  479. {
  480. const u8 red = static_cast<u8>(color >> 24);
  481. const u8 green = static_cast<u8>(color >> 16);
  482. const u8 blue = static_cast<u8>(color >> 8);
  483. // GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
  484. // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
  485. // These numbers were determined by hardware testing
  486. const u16 y = +66 * red + 129 * green + +25 * blue;
  487. const s16 u = -38 * red + -74 * green + 112 * blue;
  488. const s16 v = 112 * red + -94 * green + -18 * blue;
  489. const u8 y_round = static_cast<u8>((y >> 8) + ((y >> 7) & 1));
  490. const s8 u_round = static_cast<s8>((u >> 8) + ((u >> 7) & 1));
  491. const s8 v_round = static_cast<s8>((v >> 8) + ((v >> 7) & 1));
  492. return {y_round, u_round, v_round};
  493. }
  494. u32 GetDepth(u16 x, u16 y)
  495. {
  496. u32 offset = GetDepthOffset(x, y);
  497. return GetPixelDepth(offset);
  498. }
  499. u8* GetPixelPointer(u16 x, u16 y, bool depth)
  500. {
  501. if (depth)
  502. return &efb[GetDepthOffset(x, y)];
  503. return &efb[GetColorOffset(x, y)];
  504. }
  505. void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const MathUtil::Rectangle<int>& source_rect,
  506. float y_scale, float gamma)
  507. {
  508. if (!xfb_in_ram)
  509. {
  510. WARN_LOG_FMT(VIDEO, "Tried to copy to invalid XFB address");
  511. return;
  512. }
  513. const int left = source_rect.left;
  514. const int right = source_rect.right;
  515. const bool clamp_top = bpmem.triggerEFBCopy.clamp_top;
  516. const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom;
  517. const float gamma_rcp = 1.0f / gamma;
  518. const auto filter_coefficients = bpmem.copyfilter.GetCoefficients();
  519. // this assumes copies will always start on an even (YU) pixel and the
  520. // copy always has an even width, which might not be true.
  521. if (left & 1 || right & 1)
  522. {
  523. WARN_LOG_FMT(VIDEO, "Trying to copy XFB to from unaligned EFB source");
  524. // this will show up as wrongly encoded
  525. }
  526. // Scanline buffer, leave room for borders
  527. yuv444 scanline[EFB_WIDTH + 2];
  528. static std::vector<yuv422_packed> source;
  529. source.resize(EFB_WIDTH * EFB_HEIGHT);
  530. yuv422_packed* src_ptr = &source[0];
  531. for (int y = source_rect.top; y < source_rect.bottom; y++)
  532. {
  533. // Clamping behavior
  534. // NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
  535. // which returns random garbage from the empty bus (confirmed by hardware tests).
  536. //
  537. // In our implementation, the garbage just so happens to be the top or bottom row.
  538. // Statistically, that could happen.
  539. const u16 y_prev = static_cast<u16>(std::max(clamp_top ? source_rect.top : 0, y - 1));
  540. const u16 y_next = static_cast<u16>(
  541. std::min<int>((clamp_bottom ? source_rect.bottom : EFB_HEIGHT) - 1, y + 1));
  542. // Get a scanline of YUV pixels in 4:4:4 format
  543. for (int i = 1, x = left; x < right; i++, x++)
  544. {
  545. // Get RGB colors
  546. std::array<u32, 3> colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}};
  547. // Vertical Filter (Multisampling resolve, deflicker, brightness)
  548. u32 filtered = VerticalFilter(colors, filter_coefficients);
  549. // Gamma correction happens here.
  550. filtered = GammaCorrection(filtered, gamma_rcp);
  551. scanline[i] = ConvertColorToYUV(filtered);
  552. }
  553. // Flipper clamps the border colors
  554. scanline[0] = scanline[1];
  555. scanline[right + 1] = scanline[right];
  556. // And Downsample them to 4:2:2
  557. for (int i = 1, x = left; x < right; i += 2, x += 2)
  558. {
  559. // YU pixel
  560. src_ptr[x].Y = scanline[i].Y + 16;
  561. // we mix our color differences in 10 bit space so it will round more accurately
  562. // U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 * U[i+1]
  563. src_ptr[x].UV = 128 + ((scanline[i - 1].U + (scanline[i].U << 1) + scanline[i + 1].U) >> 2);
  564. // YV pixel
  565. src_ptr[x + 1].Y = scanline[i + 1].Y + 16;
  566. // V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1]
  567. src_ptr[x + 1].UV =
  568. 128 + ((scanline[i - 1].V + (scanline[i].V << 1) + scanline[i + 1].V) >> 2);
  569. }
  570. src_ptr += memory_stride;
  571. }
  572. const int src_width = source_rect.GetWidth();
  573. const int src_height = source_rect.GetHeight();
  574. const int dst_width = src_width;
  575. const int dst_height = src_height * y_scale;
  576. SW::CopyRegion(source.data(), src_width, src_height, reinterpret_cast<yuv422_packed*>(xfb_in_ram),
  577. dst_width, dst_height);
  578. }
  579. bool ZCompare(u16 x, u16 y, u32 z)
  580. {
  581. u32 offset = GetDepthOffset(x, y);
  582. u32 depth = GetPixelDepth(offset);
  583. bool pass;
  584. switch (bpmem.zmode.func)
  585. {
  586. case CompareMode::Never:
  587. pass = false;
  588. break;
  589. case CompareMode::Less:
  590. pass = z < depth;
  591. break;
  592. case CompareMode::Equal:
  593. pass = z == depth;
  594. break;
  595. case CompareMode::LEqual:
  596. pass = z <= depth;
  597. break;
  598. case CompareMode::Greater:
  599. pass = z > depth;
  600. break;
  601. case CompareMode::NEqual:
  602. pass = z != depth;
  603. break;
  604. case CompareMode::GEqual:
  605. pass = z >= depth;
  606. break;
  607. case CompareMode::Always:
  608. pass = true;
  609. break;
  610. default:
  611. pass = false;
  612. ERROR_LOG_FMT(VIDEO, "Bad Z compare mode {}", bpmem.zmode.func);
  613. break;
  614. }
  615. if (pass && bpmem.zmode.updateenable)
  616. {
  617. SetPixelDepth(offset, z);
  618. }
  619. return pass;
  620. }
  621. u32 GetPerfQueryResult(PerfQueryType type)
  622. {
  623. return perf_values[type];
  624. }
  625. void ResetPerfQuery()
  626. {
  627. perf_values = {};
  628. }
  629. void IncPerfCounterQuadCount(PerfQueryType type)
  630. {
  631. // NOTE: hardware doesn't process individual pixels but quads instead.
  632. // Current software renderer architecture works on pixels though, so
  633. // we have this "quad" hack here to only increment the registers on
  634. // every fourth rendered pixel
  635. static u32 quad[PQ_NUM_MEMBERS];
  636. if (++quad[type] != 3)
  637. return;
  638. quad[type] = 0;
  639. ++perf_values[type];
  640. }
  641. } // namespace EfbInterface