VertexManagerBase.cpp 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172
  1. // Copyright 2010 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoCommon/VertexManagerBase.h"
  4. #include <array>
  5. #include <cmath>
  6. #include <memory>
  7. #include "Common/ChunkFile.h"
  8. #include "Common/CommonTypes.h"
  9. #include "Common/EnumMap.h"
  10. #include "Common/Logging/Log.h"
  11. #include "Common/MathUtil.h"
  12. #include "Common/SmallVector.h"
  13. #include "Core/DolphinAnalytics.h"
  14. #include "Core/HW/SystemTimers.h"
  15. #include "Core/System.h"
  16. #include "VideoCommon/AbstractGfx.h"
  17. #include "VideoCommon/BPMemory.h"
  18. #include "VideoCommon/BoundingBox.h"
  19. #include "VideoCommon/DataReader.h"
  20. #include "VideoCommon/FramebufferManager.h"
  21. #include "VideoCommon/GeometryShaderManager.h"
  22. #include "VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.h"
  23. #include "VideoCommon/GraphicsModSystem/Runtime/GraphicsModActionData.h"
  24. #include "VideoCommon/GraphicsModSystem/Runtime/GraphicsModManager.h"
  25. #include "VideoCommon/IndexGenerator.h"
  26. #include "VideoCommon/NativeVertexFormat.h"
  27. #include "VideoCommon/OpcodeDecoding.h"
  28. #include "VideoCommon/PerfQueryBase.h"
  29. #include "VideoCommon/PixelShaderGen.h"
  30. #include "VideoCommon/PixelShaderManager.h"
  31. #include "VideoCommon/Statistics.h"
  32. #include "VideoCommon/TextureCacheBase.h"
  33. #include "VideoCommon/TextureInfo.h"
  34. #include "VideoCommon/VertexLoaderManager.h"
  35. #include "VideoCommon/VertexShaderManager.h"
  36. #include "VideoCommon/VideoBackendBase.h"
  37. #include "VideoCommon/VideoCommon.h"
  38. #include "VideoCommon/VideoConfig.h"
  39. #include "VideoCommon/XFMemory.h"
  40. #include "VideoCommon/XFStateManager.h"
  41. std::unique_ptr<VertexManagerBase> g_vertex_manager;
  42. using OpcodeDecoder::Primitive;
  43. // GX primitive -> RenderState primitive, no primitive restart
  44. constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx{
  45. PrimitiveType::Triangles, // GX_DRAW_QUADS
  46. PrimitiveType::Triangles, // GX_DRAW_QUADS_2
  47. PrimitiveType::Triangles, // GX_DRAW_TRIANGLES
  48. PrimitiveType::Triangles, // GX_DRAW_TRIANGLE_STRIP
  49. PrimitiveType::Triangles, // GX_DRAW_TRIANGLE_FAN
  50. PrimitiveType::Lines, // GX_DRAW_LINES
  51. PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
  52. PrimitiveType::Points, // GX_DRAW_POINTS
  53. };
  54. // GX primitive -> RenderState primitive, using primitive restart
  55. constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx_pr{
  56. PrimitiveType::TriangleStrip, // GX_DRAW_QUADS
  57. PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2
  58. PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES
  59. PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLE_STRIP
  60. PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLE_FAN
  61. PrimitiveType::Lines, // GX_DRAW_LINES
  62. PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
  63. PrimitiveType::Points, // GX_DRAW_POINTS
  64. };
  65. // Due to the BT.601 standard which the GameCube is based on being a compromise
  66. // between PAL and NTSC, neither standard gets square pixels. They are each off
  67. // by ~9% in opposite directions.
  68. // Just in case any game decides to take this into account, we do both these
  69. // tests with a large amount of slop.
  70. static float CalculateProjectionViewportRatio(const Projection::Raw& projection,
  71. const Viewport& viewport)
  72. {
  73. const float projection_ar = projection[2] / projection[0];
  74. const float viewport_ar = viewport.wd / viewport.ht;
  75. return std::abs(projection_ar / viewport_ar);
  76. }
  77. static bool IsAnamorphicProjection(const Projection::Raw& projection, const Viewport& viewport,
  78. const VideoConfig& config)
  79. {
  80. // If ratio between our projection and viewport aspect ratios is similar to 16:9 / 4:3
  81. // we have an anamorphic projection. This value can be overridden by a GameINI.
  82. // Game cheats that change the aspect ratio to natively unsupported ones
  83. // won't be automatically recognized here.
  84. return std::abs(CalculateProjectionViewportRatio(projection, viewport) -
  85. config.widescreen_heuristic_widescreen_ratio) <
  86. config.widescreen_heuristic_aspect_ratio_slop;
  87. }
  88. static bool IsNormalProjection(const Projection::Raw& projection, const Viewport& viewport,
  89. const VideoConfig& config)
  90. {
  91. return std::abs(CalculateProjectionViewportRatio(projection, viewport) -
  92. config.widescreen_heuristic_standard_ratio) <
  93. config.widescreen_heuristic_aspect_ratio_slop;
  94. }
  95. VertexManagerBase::VertexManagerBase()
  96. : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE)
  97. {
  98. }
  99. VertexManagerBase::~VertexManagerBase() = default;
  100. bool VertexManagerBase::Initialize()
  101. {
  102. m_frame_end_event =
  103. AfterFrameEvent::Register([this](Core::System&) { OnEndFrame(); }, "VertexManagerBase");
  104. m_after_present_event = AfterPresentEvent::Register(
  105. [this](const PresentInfo& pi) { m_ticks_elapsed = pi.emulated_timestamp; },
  106. "VertexManagerBase");
  107. m_index_generator.Init();
  108. m_custom_shader_cache = std::make_unique<CustomShaderCache>();
  109. m_cpu_cull.Init();
  110. return true;
  111. }
  112. u32 VertexManagerBase::GetRemainingSize() const
  113. {
  114. return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer);
  115. }
  116. void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
  117. {
  118. m_index_generator.AddIndices(primitive, num_vertices);
  119. }
  120. bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
  121. OpcodeDecoder::Primitive primitive, const u8* src,
  122. u32 count)
  123. {
  124. return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
  125. }
  126. DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
  127. u32 count, u32 stride, bool cullall)
  128. {
  129. // Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently.
  130. g_framebuffer_manager->FlushEFBPokes();
  131. // The SSE vertex loader can write up to 4 bytes past the end
  132. u32 const needed_vertex_bytes = count * stride + 4;
  133. // We can't merge different kinds of primitives, so we have to flush here
  134. PrimitiveType new_primitive_type = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
  135. primitive_from_gx_pr[primitive] :
  136. primitive_from_gx[primitive];
  137. if (m_current_primitive_type != new_primitive_type) [[unlikely]]
  138. {
  139. Flush();
  140. // Have to update the rasterization state for point/line cull modes.
  141. m_current_primitive_type = new_primitive_type;
  142. SetRasterizationStateChanged();
  143. }
  144. u32 remaining_indices = GetRemainingIndices(primitive);
  145. u32 remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
  146. // Check for size in buffer, if the buffer gets full, call Flush()
  147. if (!m_is_flushed && (count > remaining_index_generator_indices || count > remaining_indices ||
  148. needed_vertex_bytes > GetRemainingSize())) [[unlikely]]
  149. {
  150. Flush();
  151. }
  152. m_cull_all = cullall;
  153. // need to alloc new buffer
  154. if (m_is_flushed) [[unlikely]]
  155. {
  156. if (cullall)
  157. {
  158. // This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
  159. m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data();
  160. m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
  161. m_index_generator.Start(m_cpu_index_buffer.data());
  162. }
  163. else
  164. {
  165. ResetBuffer(stride);
  166. }
  167. remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
  168. remaining_indices = GetRemainingIndices(primitive);
  169. m_is_flushed = false;
  170. }
  171. // Now that we've reset the buffer, there should be enough space. It's possible that we still
  172. // won't have enough space in a few rare cases, such as vertex shader line/point expansion with a
  173. // ton of lines in one draw command, in which case we will either need to add support for
  174. // splitting a single draw command into multiple draws or using bigger indices.
  175. ASSERT_MSG(VIDEO, count <= remaining_index_generator_indices,
  176. "VertexManager: Too few remaining index values ({} > {}). "
  177. "32-bit indices or primitive breaking needed.",
  178. count, remaining_index_generator_indices);
  179. ASSERT_MSG(VIDEO, count <= remaining_indices,
  180. "VertexManager: Buffer not large enough for all indices! ({} > {}) "
  181. "Increase MAXIBUFFERSIZE or we need primitive breaking after all.",
  182. count, remaining_indices);
  183. ASSERT_MSG(VIDEO, needed_vertex_bytes <= GetRemainingSize(),
  184. "VertexManager: Buffer not large enough for all vertices! ({} > {}) "
  185. "Increase MAXVBUFFERSIZE or we need primitive breaking after all.",
  186. needed_vertex_bytes, GetRemainingSize());
  187. return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
  188. }
  189. DataReader VertexManagerBase::DisableCullAll(u32 stride)
  190. {
  191. if (m_cull_all)
  192. {
  193. m_cull_all = false;
  194. ResetBuffer(stride);
  195. }
  196. return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
  197. }
  198. void VertexManagerBase::FlushData(u32 count, u32 stride)
  199. {
  200. m_cur_buffer_pointer += count * stride;
  201. }
  202. u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const
  203. {
  204. const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();
  205. if (primitive >= Primitive::GX_DRAW_LINES)
  206. {
  207. if (g_Config.UseVSForLinePointExpand())
  208. {
  209. if (g_Config.backend_info.bSupportsPrimitiveRestart)
  210. {
  211. switch (primitive)
  212. {
  213. case Primitive::GX_DRAW_LINES:
  214. return index_len / 5 * 2;
  215. case Primitive::GX_DRAW_LINE_STRIP:
  216. return index_len / 5 + 1;
  217. case Primitive::GX_DRAW_POINTS:
  218. return index_len / 5;
  219. default:
  220. return 0;
  221. }
  222. }
  223. else
  224. {
  225. switch (primitive)
  226. {
  227. case Primitive::GX_DRAW_LINES:
  228. return index_len / 6 * 2;
  229. case Primitive::GX_DRAW_LINE_STRIP:
  230. return index_len / 6 + 1;
  231. case Primitive::GX_DRAW_POINTS:
  232. return index_len / 6;
  233. default:
  234. return 0;
  235. }
  236. }
  237. }
  238. else
  239. {
  240. switch (primitive)
  241. {
  242. case Primitive::GX_DRAW_LINES:
  243. return index_len;
  244. case Primitive::GX_DRAW_LINE_STRIP:
  245. return index_len / 2 + 1;
  246. case Primitive::GX_DRAW_POINTS:
  247. return index_len;
  248. default:
  249. return 0;
  250. }
  251. }
  252. }
  253. else if (g_Config.backend_info.bSupportsPrimitiveRestart)
  254. {
  255. switch (primitive)
  256. {
  257. case Primitive::GX_DRAW_QUADS:
  258. case Primitive::GX_DRAW_QUADS_2:
  259. return index_len / 5 * 4;
  260. case Primitive::GX_DRAW_TRIANGLES:
  261. return index_len / 4 * 3;
  262. case Primitive::GX_DRAW_TRIANGLE_STRIP:
  263. return index_len / 1 - 1;
  264. case Primitive::GX_DRAW_TRIANGLE_FAN:
  265. return index_len / 6 * 4 + 1;
  266. default:
  267. return 0;
  268. }
  269. }
  270. else
  271. {
  272. switch (primitive)
  273. {
  274. case Primitive::GX_DRAW_QUADS:
  275. case Primitive::GX_DRAW_QUADS_2:
  276. return index_len / 6 * 4;
  277. case Primitive::GX_DRAW_TRIANGLES:
  278. return index_len;
  279. case Primitive::GX_DRAW_TRIANGLE_STRIP:
  280. return index_len / 3 + 2;
  281. case Primitive::GX_DRAW_TRIANGLE_FAN:
  282. return index_len / 3 + 2;
  283. default:
  284. return 0;
  285. }
  286. }
  287. }
  288. auto VertexManagerBase::ResetFlushAspectRatioCount() -> FlushStatistics
  289. {
  290. const auto result = m_flush_statistics;
  291. m_flush_statistics = {};
  292. return result;
  293. }
  294. void VertexManagerBase::ResetBuffer(u32 vertex_stride)
  295. {
  296. m_base_buffer_pointer = m_cpu_vertex_buffer.data();
  297. m_cur_buffer_pointer = m_cpu_vertex_buffer.data();
  298. m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
  299. m_index_generator.Start(m_cpu_index_buffer.data());
  300. }
  301. void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
  302. u32* out_base_vertex, u32* out_base_index)
  303. {
  304. *out_base_vertex = 0;
  305. *out_base_index = 0;
  306. }
  307. void VertexManagerBase::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
  308. {
  309. // If bounding box is enabled, we need to flush any changes first, then invalidate what we have.
  310. if (g_bounding_box->IsEnabled() && g_ActiveConfig.bBBoxEnable &&
  311. g_ActiveConfig.backend_info.bSupportsBBox)
  312. {
  313. g_bounding_box->Flush();
  314. }
  315. g_gfx->DrawIndexed(base_index, num_indices, base_vertex);
  316. }
  317. void VertexManagerBase::UploadUniforms()
  318. {
  319. }
  320. void VertexManagerBase::InvalidateConstants()
  321. {
  322. auto& system = Core::System::GetInstance();
  323. auto& vertex_shader_manager = system.GetVertexShaderManager();
  324. auto& geometry_shader_manager = system.GetGeometryShaderManager();
  325. auto& pixel_shader_manager = system.GetPixelShaderManager();
  326. vertex_shader_manager.dirty = true;
  327. geometry_shader_manager.dirty = true;
  328. pixel_shader_manager.dirty = true;
  329. }
  330. void VertexManagerBase::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
  331. {
  332. }
  333. void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_stride,
  334. u32 num_vertices, const u16* indices, u32 num_indices,
  335. u32* out_base_vertex, u32* out_base_index)
  336. {
  337. // The GX vertex list should be flushed before any utility draws occur.
  338. ASSERT(m_is_flushed);
  339. // Copy into the buffers usually used for GX drawing.
  340. ResetBuffer(std::max(vertex_stride, 1u));
  341. if (vertices)
  342. {
  343. const u32 copy_size = vertex_stride * num_vertices;
  344. ASSERT((m_cur_buffer_pointer + copy_size) <= m_end_buffer_pointer);
  345. std::memcpy(m_cur_buffer_pointer, vertices, copy_size);
  346. m_cur_buffer_pointer += copy_size;
  347. }
  348. if (indices)
  349. m_index_generator.AddExternalIndices(indices, num_indices, num_vertices);
  350. CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index);
  351. }
  352. u32 VertexManagerBase::GetTexelBufferElementSize(TexelBufferFormat buffer_format)
  353. {
  354. // R8 - 1, R16 - 2, RGBA8 - 4, R32G32 - 8
  355. return 1u << static_cast<u32>(buffer_format);
  356. }
  357. bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
  358. u32* out_offset)
  359. {
  360. return false;
  361. }
  362. bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
  363. u32* out_offset, const void* palette_data,
  364. u32 palette_size, TexelBufferFormat palette_format,
  365. u32* palette_offset)
  366. {
  367. return false;
  368. }
  369. BitSet32 VertexManagerBase::UsedTextures() const
  370. {
  371. BitSet32 usedtextures;
  372. for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
  373. if (bpmem.tevorders[i / 2].getEnable(i & 1))
  374. usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true;
  375. if (bpmem.genMode.numindstages > 0)
  376. for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
  377. if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
  378. usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
  379. return usedtextures;
  380. }
  381. void VertexManagerBase::Flush()
  382. {
  383. if (m_is_flushed)
  384. return;
  385. m_is_flushed = true;
  386. if (m_draw_counter == 0)
  387. {
  388. // This is more or less the start of the Frame
  389. BeforeFrameEvent::Trigger();
  390. }
  391. if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens ||
  392. xfmem.numChan.numColorChans != bpmem.genMode.numcolchans)
  393. {
  394. ERROR_LOG_FMT(
  395. VIDEO,
  396. "Mismatched configuration between XF and BP stages - {}/{} texgens, {}/{} colors. "
  397. "Skipping draw. Please report on the issue tracker.",
  398. xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value(), xfmem.numChan.numColorChans,
  399. bpmem.genMode.numcolchans.Value());
  400. // Analytics reporting so we can discover which games have this problem, that way when we
  401. // eventually simulate the behavior we have test cases for it.
  402. if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
  403. {
  404. DolphinAnalytics::Instance().ReportGameQuirk(
  405. GameQuirk::MISMATCHED_GPU_TEXGENS_BETWEEN_XF_AND_BP);
  406. }
  407. if (xfmem.numChan.numColorChans != bpmem.genMode.numcolchans)
  408. {
  409. DolphinAnalytics::Instance().ReportGameQuirk(
  410. GameQuirk::MISMATCHED_GPU_COLORS_BETWEEN_XF_AND_BP);
  411. }
  412. return;
  413. }
  414. #if defined(_DEBUG) || defined(DEBUGFAST)
  415. PRIM_LOG("frame{}:\n texgen={}, numchan={}, dualtex={}, ztex={}, cole={}, alpe={}, ze={}",
  416. g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
  417. xfmem.dualTexTrans.enabled, bpmem.ztex2.op.Value(), bpmem.blendmode.colorupdate.Value(),
  418. bpmem.blendmode.alphaupdate.Value(), bpmem.zmode.updateenable.Value());
  419. for (u32 i = 0; i < xfmem.numChan.numColorChans; ++i)
  420. {
  421. LitChannel* ch = &xfmem.color[i];
  422. PRIM_LOG("colchan{}: matsrc={}, light={:#x}, ambsrc={}, diffunc={}, attfunc={}", i,
  423. ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
  424. ch->diffusefunc.Value(), ch->attnfunc.Value());
  425. ch = &xfmem.alpha[i];
  426. PRIM_LOG("alpchan{}: matsrc={}, light={:#x}, ambsrc={}, diffunc={}, attfunc={}", i,
  427. ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
  428. ch->diffusefunc.Value(), ch->attnfunc.Value());
  429. }
  430. for (u32 i = 0; i < xfmem.numTexGen.numTexGens; ++i)
  431. {
  432. TexMtxInfo tinfo = xfmem.texMtxInfo[i];
  433. if (tinfo.texgentype != TexGenType::EmbossMap)
  434. tinfo.hex &= 0x7ff;
  435. if (tinfo.texgentype != TexGenType::Regular)
  436. tinfo.projection = TexSize::ST;
  437. PRIM_LOG("txgen{}: proj={}, input={}, gentype={}, srcrow={}, embsrc={}, emblght={}, "
  438. "postmtx={}, postnorm={}",
  439. i, tinfo.projection.Value(), tinfo.inputform.Value(), tinfo.texgentype.Value(),
  440. tinfo.sourcerow.Value(), tinfo.embosssourceshift.Value(),
  441. tinfo.embosslightshift.Value(), xfmem.postMtxInfo[i].index.Value(),
  442. xfmem.postMtxInfo[i].normalize.Value());
  443. }
  444. PRIM_LOG("pixel: tev={}, ind={}, texgen={}, dstalpha={}, alphatest={:#x}",
  445. bpmem.genMode.numtevstages.Value() + 1, bpmem.genMode.numindstages.Value(),
  446. bpmem.genMode.numtexgens.Value(), bpmem.dstalpha.enable.Value(),
  447. (bpmem.alpha_test.hex >> 16) & 0xff);
  448. #endif
  449. // Track some stats used elsewhere by the anamorphic widescreen heuristic.
  450. auto& system = Core::System::GetInstance();
  451. if (!system.IsWii())
  452. {
  453. const bool is_perspective = xfmem.projection.type == ProjectionType::Perspective;
  454. auto& counts =
  455. is_perspective ? m_flush_statistics.perspective : m_flush_statistics.orthographic;
  456. const auto& projection = xfmem.projection.rawProjection;
  457. // TODO: Potentially the viewport size could be used as weight for the flush count average.
  458. // This way a small minimap would have less effect than a fullscreen projection.
  459. const auto& viewport = xfmem.viewport;
  460. // FYI: This average is based on flushes.
  461. // It doesn't look at vertex counts like the heuristic does.
  462. counts.average_ratio.Push(CalculateProjectionViewportRatio(projection, viewport));
  463. if (IsAnamorphicProjection(projection, viewport, g_ActiveConfig))
  464. {
  465. ++counts.anamorphic_flush_count;
  466. counts.anamorphic_vertex_count += m_index_generator.GetIndexLen();
  467. }
  468. else if (IsNormalProjection(projection, viewport, g_ActiveConfig))
  469. {
  470. ++counts.normal_flush_count;
  471. counts.normal_vertex_count += m_index_generator.GetIndexLen();
  472. }
  473. else
  474. {
  475. ++counts.other_flush_count;
  476. counts.other_vertex_count += m_index_generator.GetIndexLen();
  477. }
  478. }
  479. auto& pixel_shader_manager = system.GetPixelShaderManager();
  480. auto& geometry_shader_manager = system.GetGeometryShaderManager();
  481. auto& vertex_shader_manager = system.GetVertexShaderManager();
  482. auto& xf_state_manager = system.GetXFStateManager();
  483. if (g_ActiveConfig.bGraphicMods)
  484. {
  485. const double seconds_elapsed =
  486. static_cast<double>(m_ticks_elapsed) / system.GetSystemTimers().GetTicksPerSecond();
  487. pixel_shader_manager.constants.time_ms = seconds_elapsed * 1000;
  488. }
  489. CalculateNormals(VertexLoaderManager::GetCurrentVertexFormat());
  490. // Calculate ZSlope for zfreeze
  491. const auto used_textures = UsedTextures();
  492. std::vector<std::string> texture_names;
  493. Common::SmallVector<u32, 8> texture_units;
  494. std::array<SamplerState, 8> samplers;
  495. if (!m_cull_all)
  496. {
  497. if (!g_ActiveConfig.bGraphicMods)
  498. {
  499. for (const u32 i : used_textures)
  500. {
  501. const auto cache_entry = g_texture_cache->Load(TextureInfo::FromStage(i));
  502. if (!cache_entry)
  503. continue;
  504. const float custom_tex_scale = cache_entry->GetWidth() / float(cache_entry->native_width);
  505. samplers[i] = TextureCacheBase::GetSamplerState(
  506. i, custom_tex_scale, cache_entry->is_custom_tex, cache_entry->has_arbitrary_mips);
  507. }
  508. }
  509. else
  510. {
  511. for (const u32 i : used_textures)
  512. {
  513. const auto cache_entry = g_texture_cache->Load(TextureInfo::FromStage(i));
  514. if (cache_entry)
  515. {
  516. if (std::find(texture_names.begin(), texture_names.end(),
  517. cache_entry->texture_info_name) == texture_names.end())
  518. {
  519. texture_names.push_back(cache_entry->texture_info_name);
  520. texture_units.push_back(i);
  521. }
  522. const float custom_tex_scale = cache_entry->GetWidth() / float(cache_entry->native_width);
  523. samplers[i] = TextureCacheBase::GetSamplerState(
  524. i, custom_tex_scale, cache_entry->is_custom_tex, cache_entry->has_arbitrary_mips);
  525. }
  526. }
  527. }
  528. }
  529. vertex_shader_manager.SetConstants(texture_names, xf_state_manager);
  530. if (!bpmem.genMode.zfreeze)
  531. {
  532. // Must be done after VertexShaderManager::SetConstants()
  533. CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
  534. }
  535. else if (m_zslope.dirty && !m_cull_all) // or apply any dirty ZSlopes
  536. {
  537. pixel_shader_manager.SetZSlope(m_zslope.dfdx, m_zslope.dfdy, m_zslope.f0);
  538. m_zslope.dirty = false;
  539. }
  540. if (!m_cull_all)
  541. {
  542. CustomPixelShaderContents custom_pixel_shader_contents;
  543. std::optional<CustomPixelShader> custom_pixel_shader;
  544. std::vector<std::string> custom_pixel_texture_names;
  545. std::span<u8> custom_pixel_shader_uniforms;
  546. bool skip = false;
  547. for (size_t i = 0; i < texture_names.size(); i++)
  548. {
  549. GraphicsModActionData::DrawStarted draw_started{texture_units, &skip, &custom_pixel_shader,
  550. &custom_pixel_shader_uniforms};
  551. for (const auto& action : g_graphics_mod_manager->GetDrawStartedActions(texture_names[i]))
  552. {
  553. action->OnDrawStarted(&draw_started);
  554. if (custom_pixel_shader)
  555. {
  556. custom_pixel_shader_contents.shaders.push_back(*custom_pixel_shader);
  557. custom_pixel_texture_names.push_back(texture_names[i]);
  558. }
  559. custom_pixel_shader = std::nullopt;
  560. }
  561. }
  562. // Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
  563. // must be careful to not upload any utility vertices, as the binding will be lost otherwise.
  564. const u32 num_indices = m_index_generator.GetIndexLen();
  565. if (num_indices == 0)
  566. return;
  567. // Texture loading can cause palettes to be applied (-> uniforms -> draws).
  568. // Palette application does not use vertices, only a full-screen quad, so this is okay.
  569. // Same with GPU texture decoding, which uses compute shaders.
  570. g_texture_cache->BindTextures(used_textures, samplers);
  571. if (PerfQueryBase::ShouldEmulate())
  572. g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
  573. if (!skip)
  574. {
  575. UpdatePipelineConfig();
  576. UpdatePipelineObject();
  577. if (m_current_pipeline_object)
  578. {
  579. const AbstractPipeline* pipeline_object = m_current_pipeline_object;
  580. if (!custom_pixel_shader_contents.shaders.empty())
  581. {
  582. if (const auto custom_pipeline =
  583. GetCustomPipeline(custom_pixel_shader_contents, m_current_pipeline_config,
  584. m_current_uber_pipeline_config, m_current_pipeline_object))
  585. {
  586. pipeline_object = custom_pipeline;
  587. }
  588. }
  589. RenderDrawCall(pixel_shader_manager, geometry_shader_manager, custom_pixel_shader_contents,
  590. custom_pixel_shader_uniforms, m_current_primitive_type, pipeline_object);
  591. }
  592. }
  593. // Track the total emulated state draws
  594. INCSTAT(g_stats.this_frame.num_draw_calls);
  595. // Even if we skip the draw, emulated state should still be impacted
  596. OnDraw();
  597. if (PerfQueryBase::ShouldEmulate())
  598. g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
  599. // The EFB cache is now potentially stale.
  600. g_framebuffer_manager->FlagPeekCacheAsOutOfDate();
  601. }
  602. if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
  603. {
  604. ERROR_LOG_FMT(VIDEO,
  605. "xf.numtexgens ({}) does not match bp.numtexgens ({}). Error in command stream.",
  606. xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
  607. }
  608. }
  609. void VertexManagerBase::DoState(PointerWrap& p)
  610. {
  611. if (p.IsReadMode())
  612. {
  613. // Flush old vertex data before loading state.
  614. Flush();
  615. }
  616. p.Do(m_zslope);
  617. p.Do(VertexLoaderManager::normal_cache);
  618. p.Do(VertexLoaderManager::tangent_cache);
  619. p.Do(VertexLoaderManager::binormal_cache);
  620. }
  621. void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
  622. {
  623. float out[12];
  624. float viewOffset[2] = {xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
  625. xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
  626. if (m_current_primitive_type != PrimitiveType::Triangles &&
  627. m_current_primitive_type != PrimitiveType::TriangleStrip)
  628. {
  629. return;
  630. }
  631. // Global matrix ID.
  632. u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
  633. const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
  634. // Make sure the buffer contains at least 3 vertices.
  635. if ((m_cur_buffer_pointer - m_base_buffer_pointer) < (vert_decl.stride * 3))
  636. return;
  637. // Lookup vertices of the last rendered triangle and software-transform them
  638. // This allows us to determine the depth slope, which will be used if z-freeze
  639. // is enabled in the following flush.
  640. auto& system = Core::System::GetInstance();
  641. auto& vertex_shader_manager = system.GetVertexShaderManager();
  642. for (unsigned int i = 0; i < 3; ++i)
  643. {
  644. // If this vertex format has per-vertex position matrix IDs, look it up.
  645. if (vert_decl.posmtx.enable)
  646. mtxIdx = VertexLoaderManager::position_matrix_index_cache[2 - i];
  647. if (vert_decl.position.components == 2)
  648. VertexLoaderManager::position_cache[2 - i][2] = 0;
  649. vertex_shader_manager.TransformToClipSpace(&VertexLoaderManager::position_cache[2 - i][0],
  650. &out[i * 4], mtxIdx);
  651. // Transform to Screenspace
  652. float inv_w = 1.0f / out[3 + i * 4];
  653. out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
  654. out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
  655. out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
  656. }
  657. float dx31 = out[8] - out[0];
  658. float dx12 = out[0] - out[4];
  659. float dy12 = out[1] - out[5];
  660. float dy31 = out[9] - out[1];
  661. float DF31 = out[10] - out[2];
  662. float DF21 = out[6] - out[2];
  663. float a = DF31 * -dy12 - DF21 * dy31;
  664. float b = dx31 * DF21 + dx12 * DF31;
  665. float c = -dx12 * dy31 - dx31 * -dy12;
  666. // Sometimes we process de-generate triangles. Stop any divide by zeros
  667. if (c == 0)
  668. return;
  669. m_zslope.dfdx = -a / c;
  670. m_zslope.dfdy = -b / c;
  671. m_zslope.f0 = out[2] - (out[0] * m_zslope.dfdx + out[1] * m_zslope.dfdy);
  672. m_zslope.dirty = true;
  673. }
  674. void VertexManagerBase::CalculateNormals(NativeVertexFormat* format)
  675. {
  676. const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
  677. // Only update the binormal/tangent vertex shader constants if the vertex format lacks binormals
  678. // (VertexLoaderManager::binormal_cache gets updated by the vertex loader when binormals are
  679. // present, though)
  680. if (vert_decl.normals[1].enable)
  681. return;
  682. VertexLoaderManager::tangent_cache[3] = 0;
  683. VertexLoaderManager::binormal_cache[3] = 0;
  684. auto& system = Core::System::GetInstance();
  685. auto& vertex_shader_manager = system.GetVertexShaderManager();
  686. if (vertex_shader_manager.constants.cached_tangent != VertexLoaderManager::tangent_cache)
  687. {
  688. vertex_shader_manager.constants.cached_tangent = VertexLoaderManager::tangent_cache;
  689. vertex_shader_manager.dirty = true;
  690. }
  691. if (vertex_shader_manager.constants.cached_binormal != VertexLoaderManager::binormal_cache)
  692. {
  693. vertex_shader_manager.constants.cached_binormal = VertexLoaderManager::binormal_cache;
  694. vertex_shader_manager.dirty = true;
  695. }
  696. if (vert_decl.normals[0].enable)
  697. return;
  698. VertexLoaderManager::normal_cache[3] = 0;
  699. if (vertex_shader_manager.constants.cached_normal != VertexLoaderManager::normal_cache)
  700. {
  701. vertex_shader_manager.constants.cached_normal = VertexLoaderManager::normal_cache;
  702. vertex_shader_manager.dirty = true;
  703. }
  704. }
  705. void VertexManagerBase::UpdatePipelineConfig()
  706. {
  707. NativeVertexFormat* vertex_format = VertexLoaderManager::GetCurrentVertexFormat();
  708. if (vertex_format != m_current_pipeline_config.vertex_format)
  709. {
  710. m_current_pipeline_config.vertex_format = vertex_format;
  711. m_current_uber_pipeline_config.vertex_format =
  712. VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration());
  713. m_pipeline_config_changed = true;
  714. }
  715. VertexShaderUid vs_uid = GetVertexShaderUid();
  716. if (vs_uid != m_current_pipeline_config.vs_uid)
  717. {
  718. m_current_pipeline_config.vs_uid = vs_uid;
  719. m_current_uber_pipeline_config.vs_uid = UberShader::GetVertexShaderUid();
  720. m_pipeline_config_changed = true;
  721. }
  722. PixelShaderUid ps_uid = GetPixelShaderUid();
  723. if (ps_uid != m_current_pipeline_config.ps_uid)
  724. {
  725. m_current_pipeline_config.ps_uid = ps_uid;
  726. m_current_uber_pipeline_config.ps_uid = UberShader::GetPixelShaderUid();
  727. m_pipeline_config_changed = true;
  728. }
  729. GeometryShaderUid gs_uid = GetGeometryShaderUid(GetCurrentPrimitiveType());
  730. if (gs_uid != m_current_pipeline_config.gs_uid)
  731. {
  732. m_current_pipeline_config.gs_uid = gs_uid;
  733. m_current_uber_pipeline_config.gs_uid = gs_uid;
  734. m_pipeline_config_changed = true;
  735. }
  736. if (m_rasterization_state_changed)
  737. {
  738. m_rasterization_state_changed = false;
  739. RasterizationState new_rs = {};
  740. new_rs.Generate(bpmem, m_current_primitive_type);
  741. if (new_rs != m_current_pipeline_config.rasterization_state)
  742. {
  743. m_current_pipeline_config.rasterization_state = new_rs;
  744. m_current_uber_pipeline_config.rasterization_state = new_rs;
  745. m_pipeline_config_changed = true;
  746. }
  747. }
  748. if (m_depth_state_changed)
  749. {
  750. m_depth_state_changed = false;
  751. DepthState new_ds = {};
  752. new_ds.Generate(bpmem);
  753. if (new_ds != m_current_pipeline_config.depth_state)
  754. {
  755. m_current_pipeline_config.depth_state = new_ds;
  756. m_current_uber_pipeline_config.depth_state = new_ds;
  757. m_pipeline_config_changed = true;
  758. }
  759. }
  760. if (m_blending_state_changed)
  761. {
  762. m_blending_state_changed = false;
  763. BlendingState new_bs = {};
  764. new_bs.Generate(bpmem);
  765. if (new_bs != m_current_pipeline_config.blending_state)
  766. {
  767. m_current_pipeline_config.blending_state = new_bs;
  768. m_current_uber_pipeline_config.blending_state = new_bs;
  769. m_pipeline_config_changed = true;
  770. }
  771. }
  772. }
  773. void VertexManagerBase::UpdatePipelineObject()
  774. {
  775. if (!m_pipeline_config_changed)
  776. return;
  777. m_current_pipeline_object = nullptr;
  778. m_pipeline_config_changed = false;
  779. switch (g_ActiveConfig.iShaderCompilationMode)
  780. {
  781. case ShaderCompilationMode::Synchronous:
  782. {
  783. // Ubershaders disabled? Block and compile the specialized shader.
  784. m_current_pipeline_object = g_shader_cache->GetPipelineForUid(m_current_pipeline_config);
  785. }
  786. break;
  787. case ShaderCompilationMode::SynchronousUberShaders:
  788. {
  789. // Exclusive ubershader mode, always use ubershaders.
  790. m_current_pipeline_object =
  791. g_shader_cache->GetUberPipelineForUid(m_current_uber_pipeline_config);
  792. }
  793. break;
  794. case ShaderCompilationMode::AsynchronousUberShaders:
  795. case ShaderCompilationMode::AsynchronousSkipRendering:
  796. {
  797. // Can we background compile shaders? If so, get the pipeline asynchronously.
  798. auto res = g_shader_cache->GetPipelineForUidAsync(m_current_pipeline_config);
  799. if (res)
  800. {
  801. // Specialized shaders are ready, prefer these.
  802. m_current_pipeline_object = *res;
  803. return;
  804. }
  805. if (g_ActiveConfig.iShaderCompilationMode == ShaderCompilationMode::AsynchronousUberShaders)
  806. {
  807. // Specialized shaders not ready, use the ubershaders.
  808. m_current_pipeline_object =
  809. g_shader_cache->GetUberPipelineForUid(m_current_uber_pipeline_config);
  810. }
  811. else
  812. {
  813. // Ensure we try again next draw. Otherwise, if no registers change between frames, the
  814. // object will never be drawn, even when the shader is ready.
  815. m_pipeline_config_changed = true;
  816. }
  817. }
  818. break;
  819. }
  820. }
  821. void VertexManagerBase::OnConfigChange()
  822. {
  823. // Reload index generator function tables in case VS expand config changed
  824. m_index_generator.Init();
  825. }
  826. void VertexManagerBase::OnDraw()
  827. {
  828. m_draw_counter++;
  829. // If the last efb copy was too close to the one before it, don't forget about it until the next
  830. // efb copy happens (which might not be for a long time)
  831. u32 diff = m_draw_counter - m_last_efb_copy_draw_counter;
  832. if (m_unflushed_efb_copy && diff > MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
  833. {
  834. g_gfx->Flush();
  835. m_unflushed_efb_copy = false;
  836. m_last_efb_copy_draw_counter = m_draw_counter;
  837. }
  838. // If we didn't have any CPU access last frame, do nothing.
  839. if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution)
  840. return;
  841. // Check if this draw is scheduled to kick a command buffer.
  842. // The draw counters will always be sorted so a binary search is possible here.
  843. if (std::ranges::binary_search(m_scheduled_command_buffer_kicks, m_draw_counter))
  844. {
  845. // Kick a command buffer on the background thread.
  846. g_gfx->Flush();
  847. m_unflushed_efb_copy = false;
  848. m_last_efb_copy_draw_counter = m_draw_counter;
  849. }
  850. }
  851. void VertexManagerBase::OnCPUEFBAccess()
  852. {
  853. // Check this isn't another access without any draws inbetween.
  854. if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter)
  855. return;
  856. // Store the current draw counter for scheduling in OnEndFrame.
  857. m_cpu_accesses_this_frame.emplace_back(m_draw_counter);
  858. }
  859. void VertexManagerBase::OnEFBCopyToRAM()
  860. {
  861. // If we're not deferring, try to preempt it next frame.
  862. if (!g_ActiveConfig.bDeferEFBCopies)
  863. {
  864. OnCPUEFBAccess();
  865. return;
  866. }
  867. // Otherwise, only execute if we have at least 10 objects between us and the last copy.
  868. const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter;
  869. m_last_efb_copy_draw_counter = m_draw_counter;
  870. if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
  871. {
  872. m_unflushed_efb_copy = true;
  873. return;
  874. }
  875. m_unflushed_efb_copy = false;
  876. g_gfx->Flush();
  877. }
  878. void VertexManagerBase::OnEndFrame()
  879. {
  880. m_draw_counter = 0;
  881. m_last_efb_copy_draw_counter = 0;
  882. m_scheduled_command_buffer_kicks.clear();
  883. // If we have no CPU access at all, leave everything in the one command buffer for maximum
  884. // parallelism between CPU/GPU, at the cost of slightly higher latency.
  885. if (m_cpu_accesses_this_frame.empty())
  886. return;
  887. // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway
  888. // between the draw counters that invoked the readback, or every 250 draws, whichever is
  889. // smaller.
  890. if (g_ActiveConfig.iCommandBufferExecuteInterval > 0)
  891. {
  892. u32 last_draw_counter = 0;
  893. u32 interval = static_cast<u32>(g_ActiveConfig.iCommandBufferExecuteInterval);
  894. for (u32 draw_counter : m_cpu_accesses_this_frame)
  895. {
  896. // We don't want to waste executing command buffers for only a few draws, so set a minimum.
  897. // Leave last_draw_counter as-is, so we get the correct number of draws between submissions.
  898. u32 draw_count = draw_counter - last_draw_counter;
  899. if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
  900. continue;
  901. if (draw_count <= interval)
  902. {
  903. u32 mid_point = draw_count / 2;
  904. m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point);
  905. }
  906. else
  907. {
  908. u32 counter = interval;
  909. while (counter < draw_count)
  910. {
  911. m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter);
  912. counter += interval;
  913. }
  914. }
  915. last_draw_counter = draw_counter;
  916. }
  917. }
  918. m_cpu_accesses_this_frame.clear();
  919. // We invalidate the pipeline object at the start of the frame.
  920. // This is for the rare case where only a single pipeline configuration is used,
  921. // and hybrid ubershaders have compiled the specialized shader, but without any
  922. // state changes the specialized shader will not take over.
  923. InvalidatePipelineObject();
  924. }
  925. void VertexManagerBase::NotifyCustomShaderCacheOfHostChange(const ShaderHostConfig& host_config)
  926. {
  927. m_custom_shader_cache->SetHostConfig(host_config);
  928. m_custom_shader_cache->Reload();
  929. }
  930. void VertexManagerBase::RenderDrawCall(
  931. PixelShaderManager& pixel_shader_manager, GeometryShaderManager& geometry_shader_manager,
  932. const CustomPixelShaderContents& custom_pixel_shader_contents,
  933. std::span<u8> custom_pixel_shader_uniforms, PrimitiveType primitive_type,
  934. const AbstractPipeline* current_pipeline)
  935. {
  936. // Now we can upload uniforms, as nothing else will override them.
  937. geometry_shader_manager.SetConstants(primitive_type);
  938. pixel_shader_manager.SetConstants();
  939. if (!custom_pixel_shader_uniforms.empty() &&
  940. pixel_shader_manager.custom_constants.data() != custom_pixel_shader_uniforms.data())
  941. {
  942. pixel_shader_manager.custom_constants_dirty = true;
  943. }
  944. pixel_shader_manager.custom_constants = custom_pixel_shader_uniforms;
  945. UploadUniforms();
  946. g_gfx->SetPipeline(current_pipeline);
  947. u32 base_vertex, base_index;
  948. CommitBuffer(m_index_generator.GetNumVerts(),
  949. VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(),
  950. m_index_generator.GetIndexLen(), &base_vertex, &base_index);
  951. if (g_ActiveConfig.backend_info.api_type != APIType::D3D &&
  952. g_ActiveConfig.UseVSForLinePointExpand() &&
  953. (primitive_type == PrimitiveType::Points || primitive_type == PrimitiveType::Lines))
  954. {
  955. // VS point/line expansion puts the vertex id at gl_VertexID << 2
  956. // That means the base vertex has to be adjusted to match
  957. // (The shader adds this after shifting right on D3D, so no need to do this)
  958. base_vertex <<= 2;
  959. }
  960. DrawCurrentBatch(base_index, m_index_generator.GetIndexLen(), base_vertex);
  961. }
  962. const AbstractPipeline* VertexManagerBase::GetCustomPipeline(
  963. const CustomPixelShaderContents& custom_pixel_shader_contents,
  964. const VideoCommon::GXPipelineUid& current_pipeline_config,
  965. const VideoCommon::GXUberPipelineUid& current_uber_pipeline_config,
  966. const AbstractPipeline* current_pipeline) const
  967. {
  968. if (current_pipeline)
  969. {
  970. if (!custom_pixel_shader_contents.shaders.empty())
  971. {
  972. CustomShaderInstance custom_shaders;
  973. custom_shaders.pixel_contents = custom_pixel_shader_contents;
  974. switch (g_ActiveConfig.iShaderCompilationMode)
  975. {
  976. case ShaderCompilationMode::Synchronous:
  977. case ShaderCompilationMode::AsynchronousSkipRendering:
  978. {
  979. if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
  980. current_pipeline_config, custom_shaders, current_pipeline->m_config))
  981. {
  982. return *pipeline;
  983. }
  984. }
  985. break;
  986. case ShaderCompilationMode::SynchronousUberShaders:
  987. {
  988. // D3D has issues compiling large custom ubershaders
  989. // use specialized shaders instead
  990. if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
  991. {
  992. if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
  993. current_pipeline_config, custom_shaders, current_pipeline->m_config))
  994. {
  995. return *pipeline;
  996. }
  997. }
  998. else
  999. {
  1000. if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
  1001. current_uber_pipeline_config, custom_shaders, current_pipeline->m_config))
  1002. {
  1003. return *pipeline;
  1004. }
  1005. }
  1006. }
  1007. break;
  1008. case ShaderCompilationMode::AsynchronousUberShaders:
  1009. {
  1010. if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
  1011. current_pipeline_config, custom_shaders, current_pipeline->m_config))
  1012. {
  1013. return *pipeline;
  1014. }
  1015. else if (auto uber_pipeline = m_custom_shader_cache->GetPipelineAsync(
  1016. current_uber_pipeline_config, custom_shaders, current_pipeline->m_config))
  1017. {
  1018. return *uber_pipeline;
  1019. }
  1020. }
  1021. break;
  1022. };
  1023. }
  1024. }
  1025. return nullptr;
  1026. }