12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172 |
- // Copyright 2010 Dolphin Emulator Project
- // SPDX-License-Identifier: GPL-2.0-or-later
- #include "VideoCommon/VertexManagerBase.h"
- #include <array>
- #include <cmath>
- #include <memory>
- #include "Common/ChunkFile.h"
- #include "Common/CommonTypes.h"
- #include "Common/EnumMap.h"
- #include "Common/Logging/Log.h"
- #include "Common/MathUtil.h"
- #include "Common/SmallVector.h"
- #include "Core/DolphinAnalytics.h"
- #include "Core/HW/SystemTimers.h"
- #include "Core/System.h"
- #include "VideoCommon/AbstractGfx.h"
- #include "VideoCommon/BPMemory.h"
- #include "VideoCommon/BoundingBox.h"
- #include "VideoCommon/DataReader.h"
- #include "VideoCommon/FramebufferManager.h"
- #include "VideoCommon/GeometryShaderManager.h"
- #include "VideoCommon/GraphicsModSystem/Runtime/CustomShaderCache.h"
- #include "VideoCommon/GraphicsModSystem/Runtime/GraphicsModActionData.h"
- #include "VideoCommon/GraphicsModSystem/Runtime/GraphicsModManager.h"
- #include "VideoCommon/IndexGenerator.h"
- #include "VideoCommon/NativeVertexFormat.h"
- #include "VideoCommon/OpcodeDecoding.h"
- #include "VideoCommon/PerfQueryBase.h"
- #include "VideoCommon/PixelShaderGen.h"
- #include "VideoCommon/PixelShaderManager.h"
- #include "VideoCommon/Statistics.h"
- #include "VideoCommon/TextureCacheBase.h"
- #include "VideoCommon/TextureInfo.h"
- #include "VideoCommon/VertexLoaderManager.h"
- #include "VideoCommon/VertexShaderManager.h"
- #include "VideoCommon/VideoBackendBase.h"
- #include "VideoCommon/VideoCommon.h"
- #include "VideoCommon/VideoConfig.h"
- #include "VideoCommon/XFMemory.h"
- #include "VideoCommon/XFStateManager.h"
- std::unique_ptr<VertexManagerBase> g_vertex_manager;
- using OpcodeDecoder::Primitive;
- // GX primitive -> RenderState primitive, no primitive restart
- constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx{
- PrimitiveType::Triangles, // GX_DRAW_QUADS
- PrimitiveType::Triangles, // GX_DRAW_QUADS_2
- PrimitiveType::Triangles, // GX_DRAW_TRIANGLES
- PrimitiveType::Triangles, // GX_DRAW_TRIANGLE_STRIP
- PrimitiveType::Triangles, // GX_DRAW_TRIANGLE_FAN
- PrimitiveType::Lines, // GX_DRAW_LINES
- PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
- PrimitiveType::Points, // GX_DRAW_POINTS
- };
- // GX primitive -> RenderState primitive, using primitive restart
- constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx_pr{
- PrimitiveType::TriangleStrip, // GX_DRAW_QUADS
- PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2
- PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES
- PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLE_STRIP
- PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLE_FAN
- PrimitiveType::Lines, // GX_DRAW_LINES
- PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
- PrimitiveType::Points, // GX_DRAW_POINTS
- };
- // Due to the BT.601 standard which the GameCube is based on being a compromise
- // between PAL and NTSC, neither standard gets square pixels. They are each off
- // by ~9% in opposite directions.
- // Just in case any game decides to take this into account, we do both these
- // tests with a large amount of slop.
- static float CalculateProjectionViewportRatio(const Projection::Raw& projection,
- const Viewport& viewport)
- {
- const float projection_ar = projection[2] / projection[0];
- const float viewport_ar = viewport.wd / viewport.ht;
- return std::abs(projection_ar / viewport_ar);
- }
- static bool IsAnamorphicProjection(const Projection::Raw& projection, const Viewport& viewport,
- const VideoConfig& config)
- {
- // If ratio between our projection and viewport aspect ratios is similar to 16:9 / 4:3
- // we have an anamorphic projection. This value can be overridden by a GameINI.
- // Game cheats that change the aspect ratio to natively unsupported ones
- // won't be automatically recognized here.
- return std::abs(CalculateProjectionViewportRatio(projection, viewport) -
- config.widescreen_heuristic_widescreen_ratio) <
- config.widescreen_heuristic_aspect_ratio_slop;
- }
- static bool IsNormalProjection(const Projection::Raw& projection, const Viewport& viewport,
- const VideoConfig& config)
- {
- return std::abs(CalculateProjectionViewportRatio(projection, viewport) -
- config.widescreen_heuristic_standard_ratio) <
- config.widescreen_heuristic_aspect_ratio_slop;
- }
- VertexManagerBase::VertexManagerBase()
- : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE)
- {
- }
- VertexManagerBase::~VertexManagerBase() = default;
- bool VertexManagerBase::Initialize()
- {
- m_frame_end_event =
- AfterFrameEvent::Register([this](Core::System&) { OnEndFrame(); }, "VertexManagerBase");
- m_after_present_event = AfterPresentEvent::Register(
- [this](const PresentInfo& pi) { m_ticks_elapsed = pi.emulated_timestamp; },
- "VertexManagerBase");
- m_index_generator.Init();
- m_custom_shader_cache = std::make_unique<CustomShaderCache>();
- m_cpu_cull.Init();
- return true;
- }
- u32 VertexManagerBase::GetRemainingSize() const
- {
- return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer);
- }
- void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
- {
- m_index_generator.AddIndices(primitive, num_vertices);
- }
- bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
- OpcodeDecoder::Primitive primitive, const u8* src,
- u32 count)
- {
- return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
- }
- DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
- u32 count, u32 stride, bool cullall)
- {
- // Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently.
- g_framebuffer_manager->FlushEFBPokes();
- // The SSE vertex loader can write up to 4 bytes past the end
- u32 const needed_vertex_bytes = count * stride + 4;
- // We can't merge different kinds of primitives, so we have to flush here
- PrimitiveType new_primitive_type = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
- primitive_from_gx_pr[primitive] :
- primitive_from_gx[primitive];
- if (m_current_primitive_type != new_primitive_type) [[unlikely]]
- {
- Flush();
- // Have to update the rasterization state for point/line cull modes.
- m_current_primitive_type = new_primitive_type;
- SetRasterizationStateChanged();
- }
- u32 remaining_indices = GetRemainingIndices(primitive);
- u32 remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
- // Check for size in buffer, if the buffer gets full, call Flush()
- if (!m_is_flushed && (count > remaining_index_generator_indices || count > remaining_indices ||
- needed_vertex_bytes > GetRemainingSize())) [[unlikely]]
- {
- Flush();
- }
- m_cull_all = cullall;
- // need to alloc new buffer
- if (m_is_flushed) [[unlikely]]
- {
- if (cullall)
- {
- // This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
- m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data();
- m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
- m_index_generator.Start(m_cpu_index_buffer.data());
- }
- else
- {
- ResetBuffer(stride);
- }
- remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
- remaining_indices = GetRemainingIndices(primitive);
- m_is_flushed = false;
- }
- // Now that we've reset the buffer, there should be enough space. It's possible that we still
- // won't have enough space in a few rare cases, such as vertex shader line/point expansion with a
- // ton of lines in one draw command, in which case we will either need to add support for
- // splitting a single draw command into multiple draws or using bigger indices.
- ASSERT_MSG(VIDEO, count <= remaining_index_generator_indices,
- "VertexManager: Too few remaining index values ({} > {}). "
- "32-bit indices or primitive breaking needed.",
- count, remaining_index_generator_indices);
- ASSERT_MSG(VIDEO, count <= remaining_indices,
- "VertexManager: Buffer not large enough for all indices! ({} > {}) "
- "Increase MAXIBUFFERSIZE or we need primitive breaking after all.",
- count, remaining_indices);
- ASSERT_MSG(VIDEO, needed_vertex_bytes <= GetRemainingSize(),
- "VertexManager: Buffer not large enough for all vertices! ({} > {}) "
- "Increase MAXVBUFFERSIZE or we need primitive breaking after all.",
- needed_vertex_bytes, GetRemainingSize());
- return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
- }
- DataReader VertexManagerBase::DisableCullAll(u32 stride)
- {
- if (m_cull_all)
- {
- m_cull_all = false;
- ResetBuffer(stride);
- }
- return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
- }
- void VertexManagerBase::FlushData(u32 count, u32 stride)
- {
- m_cur_buffer_pointer += count * stride;
- }
- u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const
- {
- const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();
- if (primitive >= Primitive::GX_DRAW_LINES)
- {
- if (g_Config.UseVSForLinePointExpand())
- {
- if (g_Config.backend_info.bSupportsPrimitiveRestart)
- {
- switch (primitive)
- {
- case Primitive::GX_DRAW_LINES:
- return index_len / 5 * 2;
- case Primitive::GX_DRAW_LINE_STRIP:
- return index_len / 5 + 1;
- case Primitive::GX_DRAW_POINTS:
- return index_len / 5;
- default:
- return 0;
- }
- }
- else
- {
- switch (primitive)
- {
- case Primitive::GX_DRAW_LINES:
- return index_len / 6 * 2;
- case Primitive::GX_DRAW_LINE_STRIP:
- return index_len / 6 + 1;
- case Primitive::GX_DRAW_POINTS:
- return index_len / 6;
- default:
- return 0;
- }
- }
- }
- else
- {
- switch (primitive)
- {
- case Primitive::GX_DRAW_LINES:
- return index_len;
- case Primitive::GX_DRAW_LINE_STRIP:
- return index_len / 2 + 1;
- case Primitive::GX_DRAW_POINTS:
- return index_len;
- default:
- return 0;
- }
- }
- }
- else if (g_Config.backend_info.bSupportsPrimitiveRestart)
- {
- switch (primitive)
- {
- case Primitive::GX_DRAW_QUADS:
- case Primitive::GX_DRAW_QUADS_2:
- return index_len / 5 * 4;
- case Primitive::GX_DRAW_TRIANGLES:
- return index_len / 4 * 3;
- case Primitive::GX_DRAW_TRIANGLE_STRIP:
- return index_len / 1 - 1;
- case Primitive::GX_DRAW_TRIANGLE_FAN:
- return index_len / 6 * 4 + 1;
- default:
- return 0;
- }
- }
- else
- {
- switch (primitive)
- {
- case Primitive::GX_DRAW_QUADS:
- case Primitive::GX_DRAW_QUADS_2:
- return index_len / 6 * 4;
- case Primitive::GX_DRAW_TRIANGLES:
- return index_len;
- case Primitive::GX_DRAW_TRIANGLE_STRIP:
- return index_len / 3 + 2;
- case Primitive::GX_DRAW_TRIANGLE_FAN:
- return index_len / 3 + 2;
- default:
- return 0;
- }
- }
- }
- auto VertexManagerBase::ResetFlushAspectRatioCount() -> FlushStatistics
- {
- const auto result = m_flush_statistics;
- m_flush_statistics = {};
- return result;
- }
- void VertexManagerBase::ResetBuffer(u32 vertex_stride)
- {
- m_base_buffer_pointer = m_cpu_vertex_buffer.data();
- m_cur_buffer_pointer = m_cpu_vertex_buffer.data();
- m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
- m_index_generator.Start(m_cpu_index_buffer.data());
- }
- void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
- u32* out_base_vertex, u32* out_base_index)
- {
- *out_base_vertex = 0;
- *out_base_index = 0;
- }
- void VertexManagerBase::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
- {
- // If bounding box is enabled, we need to flush any changes first, then invalidate what we have.
- if (g_bounding_box->IsEnabled() && g_ActiveConfig.bBBoxEnable &&
- g_ActiveConfig.backend_info.bSupportsBBox)
- {
- g_bounding_box->Flush();
- }
- g_gfx->DrawIndexed(base_index, num_indices, base_vertex);
- }
- void VertexManagerBase::UploadUniforms()
- {
- }
- void VertexManagerBase::InvalidateConstants()
- {
- auto& system = Core::System::GetInstance();
- auto& vertex_shader_manager = system.GetVertexShaderManager();
- auto& geometry_shader_manager = system.GetGeometryShaderManager();
- auto& pixel_shader_manager = system.GetPixelShaderManager();
- vertex_shader_manager.dirty = true;
- geometry_shader_manager.dirty = true;
- pixel_shader_manager.dirty = true;
- }
- void VertexManagerBase::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
- {
- }
- void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_stride,
- u32 num_vertices, const u16* indices, u32 num_indices,
- u32* out_base_vertex, u32* out_base_index)
- {
- // The GX vertex list should be flushed before any utility draws occur.
- ASSERT(m_is_flushed);
- // Copy into the buffers usually used for GX drawing.
- ResetBuffer(std::max(vertex_stride, 1u));
- if (vertices)
- {
- const u32 copy_size = vertex_stride * num_vertices;
- ASSERT((m_cur_buffer_pointer + copy_size) <= m_end_buffer_pointer);
- std::memcpy(m_cur_buffer_pointer, vertices, copy_size);
- m_cur_buffer_pointer += copy_size;
- }
- if (indices)
- m_index_generator.AddExternalIndices(indices, num_indices, num_vertices);
- CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index);
- }
- u32 VertexManagerBase::GetTexelBufferElementSize(TexelBufferFormat buffer_format)
- {
- // R8 - 1, R16 - 2, RGBA8 - 4, R32G32 - 8
- return 1u << static_cast<u32>(buffer_format);
- }
- bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
- u32* out_offset)
- {
- return false;
- }
- bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
- u32* out_offset, const void* palette_data,
- u32 palette_size, TexelBufferFormat palette_format,
- u32* palette_offset)
- {
- return false;
- }
- BitSet32 VertexManagerBase::UsedTextures() const
- {
- BitSet32 usedtextures;
- for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
- if (bpmem.tevorders[i / 2].getEnable(i & 1))
- usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true;
- if (bpmem.genMode.numindstages > 0)
- for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
- if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
- usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
- return usedtextures;
- }
- void VertexManagerBase::Flush()
- {
- if (m_is_flushed)
- return;
- m_is_flushed = true;
- if (m_draw_counter == 0)
- {
- // This is more or less the start of the Frame
- BeforeFrameEvent::Trigger();
- }
- if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens ||
- xfmem.numChan.numColorChans != bpmem.genMode.numcolchans)
- {
- ERROR_LOG_FMT(
- VIDEO,
- "Mismatched configuration between XF and BP stages - {}/{} texgens, {}/{} colors. "
- "Skipping draw. Please report on the issue tracker.",
- xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value(), xfmem.numChan.numColorChans,
- bpmem.genMode.numcolchans.Value());
- // Analytics reporting so we can discover which games have this problem, that way when we
- // eventually simulate the behavior we have test cases for it.
- if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
- {
- DolphinAnalytics::Instance().ReportGameQuirk(
- GameQuirk::MISMATCHED_GPU_TEXGENS_BETWEEN_XF_AND_BP);
- }
- if (xfmem.numChan.numColorChans != bpmem.genMode.numcolchans)
- {
- DolphinAnalytics::Instance().ReportGameQuirk(
- GameQuirk::MISMATCHED_GPU_COLORS_BETWEEN_XF_AND_BP);
- }
- return;
- }
- #if defined(_DEBUG) || defined(DEBUGFAST)
- PRIM_LOG("frame{}:\n texgen={}, numchan={}, dualtex={}, ztex={}, cole={}, alpe={}, ze={}",
- g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
- xfmem.dualTexTrans.enabled, bpmem.ztex2.op.Value(), bpmem.blendmode.colorupdate.Value(),
- bpmem.blendmode.alphaupdate.Value(), bpmem.zmode.updateenable.Value());
- for (u32 i = 0; i < xfmem.numChan.numColorChans; ++i)
- {
- LitChannel* ch = &xfmem.color[i];
- PRIM_LOG("colchan{}: matsrc={}, light={:#x}, ambsrc={}, diffunc={}, attfunc={}", i,
- ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
- ch->diffusefunc.Value(), ch->attnfunc.Value());
- ch = &xfmem.alpha[i];
- PRIM_LOG("alpchan{}: matsrc={}, light={:#x}, ambsrc={}, diffunc={}, attfunc={}", i,
- ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
- ch->diffusefunc.Value(), ch->attnfunc.Value());
- }
- for (u32 i = 0; i < xfmem.numTexGen.numTexGens; ++i)
- {
- TexMtxInfo tinfo = xfmem.texMtxInfo[i];
- if (tinfo.texgentype != TexGenType::EmbossMap)
- tinfo.hex &= 0x7ff;
- if (tinfo.texgentype != TexGenType::Regular)
- tinfo.projection = TexSize::ST;
- PRIM_LOG("txgen{}: proj={}, input={}, gentype={}, srcrow={}, embsrc={}, emblght={}, "
- "postmtx={}, postnorm={}",
- i, tinfo.projection.Value(), tinfo.inputform.Value(), tinfo.texgentype.Value(),
- tinfo.sourcerow.Value(), tinfo.embosssourceshift.Value(),
- tinfo.embosslightshift.Value(), xfmem.postMtxInfo[i].index.Value(),
- xfmem.postMtxInfo[i].normalize.Value());
- }
- PRIM_LOG("pixel: tev={}, ind={}, texgen={}, dstalpha={}, alphatest={:#x}",
- bpmem.genMode.numtevstages.Value() + 1, bpmem.genMode.numindstages.Value(),
- bpmem.genMode.numtexgens.Value(), bpmem.dstalpha.enable.Value(),
- (bpmem.alpha_test.hex >> 16) & 0xff);
- #endif
- // Track some stats used elsewhere by the anamorphic widescreen heuristic.
- auto& system = Core::System::GetInstance();
- if (!system.IsWii())
- {
- const bool is_perspective = xfmem.projection.type == ProjectionType::Perspective;
- auto& counts =
- is_perspective ? m_flush_statistics.perspective : m_flush_statistics.orthographic;
- const auto& projection = xfmem.projection.rawProjection;
- // TODO: Potentially the viewport size could be used as weight for the flush count average.
- // This way a small minimap would have less effect than a fullscreen projection.
- const auto& viewport = xfmem.viewport;
- // FYI: This average is based on flushes.
- // It doesn't look at vertex counts like the heuristic does.
- counts.average_ratio.Push(CalculateProjectionViewportRatio(projection, viewport));
- if (IsAnamorphicProjection(projection, viewport, g_ActiveConfig))
- {
- ++counts.anamorphic_flush_count;
- counts.anamorphic_vertex_count += m_index_generator.GetIndexLen();
- }
- else if (IsNormalProjection(projection, viewport, g_ActiveConfig))
- {
- ++counts.normal_flush_count;
- counts.normal_vertex_count += m_index_generator.GetIndexLen();
- }
- else
- {
- ++counts.other_flush_count;
- counts.other_vertex_count += m_index_generator.GetIndexLen();
- }
- }
- auto& pixel_shader_manager = system.GetPixelShaderManager();
- auto& geometry_shader_manager = system.GetGeometryShaderManager();
- auto& vertex_shader_manager = system.GetVertexShaderManager();
- auto& xf_state_manager = system.GetXFStateManager();
- if (g_ActiveConfig.bGraphicMods)
- {
- const double seconds_elapsed =
- static_cast<double>(m_ticks_elapsed) / system.GetSystemTimers().GetTicksPerSecond();
- pixel_shader_manager.constants.time_ms = seconds_elapsed * 1000;
- }
- CalculateNormals(VertexLoaderManager::GetCurrentVertexFormat());
- // Calculate ZSlope for zfreeze
- const auto used_textures = UsedTextures();
- std::vector<std::string> texture_names;
- Common::SmallVector<u32, 8> texture_units;
- std::array<SamplerState, 8> samplers;
- if (!m_cull_all)
- {
- if (!g_ActiveConfig.bGraphicMods)
- {
- for (const u32 i : used_textures)
- {
- const auto cache_entry = g_texture_cache->Load(TextureInfo::FromStage(i));
- if (!cache_entry)
- continue;
- const float custom_tex_scale = cache_entry->GetWidth() / float(cache_entry->native_width);
- samplers[i] = TextureCacheBase::GetSamplerState(
- i, custom_tex_scale, cache_entry->is_custom_tex, cache_entry->has_arbitrary_mips);
- }
- }
- else
- {
- for (const u32 i : used_textures)
- {
- const auto cache_entry = g_texture_cache->Load(TextureInfo::FromStage(i));
- if (cache_entry)
- {
- if (std::find(texture_names.begin(), texture_names.end(),
- cache_entry->texture_info_name) == texture_names.end())
- {
- texture_names.push_back(cache_entry->texture_info_name);
- texture_units.push_back(i);
- }
- const float custom_tex_scale = cache_entry->GetWidth() / float(cache_entry->native_width);
- samplers[i] = TextureCacheBase::GetSamplerState(
- i, custom_tex_scale, cache_entry->is_custom_tex, cache_entry->has_arbitrary_mips);
- }
- }
- }
- }
- vertex_shader_manager.SetConstants(texture_names, xf_state_manager);
- if (!bpmem.genMode.zfreeze)
- {
- // Must be done after VertexShaderManager::SetConstants()
- CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
- }
- else if (m_zslope.dirty && !m_cull_all) // or apply any dirty ZSlopes
- {
- pixel_shader_manager.SetZSlope(m_zslope.dfdx, m_zslope.dfdy, m_zslope.f0);
- m_zslope.dirty = false;
- }
- if (!m_cull_all)
- {
- CustomPixelShaderContents custom_pixel_shader_contents;
- std::optional<CustomPixelShader> custom_pixel_shader;
- std::vector<std::string> custom_pixel_texture_names;
- std::span<u8> custom_pixel_shader_uniforms;
- bool skip = false;
- for (size_t i = 0; i < texture_names.size(); i++)
- {
- GraphicsModActionData::DrawStarted draw_started{texture_units, &skip, &custom_pixel_shader,
- &custom_pixel_shader_uniforms};
- for (const auto& action : g_graphics_mod_manager->GetDrawStartedActions(texture_names[i]))
- {
- action->OnDrawStarted(&draw_started);
- if (custom_pixel_shader)
- {
- custom_pixel_shader_contents.shaders.push_back(*custom_pixel_shader);
- custom_pixel_texture_names.push_back(texture_names[i]);
- }
- custom_pixel_shader = std::nullopt;
- }
- }
- // Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
- // must be careful to not upload any utility vertices, as the binding will be lost otherwise.
- const u32 num_indices = m_index_generator.GetIndexLen();
- if (num_indices == 0)
- return;
- // Texture loading can cause palettes to be applied (-> uniforms -> draws).
- // Palette application does not use vertices, only a full-screen quad, so this is okay.
- // Same with GPU texture decoding, which uses compute shaders.
- g_texture_cache->BindTextures(used_textures, samplers);
- if (PerfQueryBase::ShouldEmulate())
- g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
- if (!skip)
- {
- UpdatePipelineConfig();
- UpdatePipelineObject();
- if (m_current_pipeline_object)
- {
- const AbstractPipeline* pipeline_object = m_current_pipeline_object;
- if (!custom_pixel_shader_contents.shaders.empty())
- {
- if (const auto custom_pipeline =
- GetCustomPipeline(custom_pixel_shader_contents, m_current_pipeline_config,
- m_current_uber_pipeline_config, m_current_pipeline_object))
- {
- pipeline_object = custom_pipeline;
- }
- }
- RenderDrawCall(pixel_shader_manager, geometry_shader_manager, custom_pixel_shader_contents,
- custom_pixel_shader_uniforms, m_current_primitive_type, pipeline_object);
- }
- }
- // Track the total emulated state draws
- INCSTAT(g_stats.this_frame.num_draw_calls);
- // Even if we skip the draw, emulated state should still be impacted
- OnDraw();
- if (PerfQueryBase::ShouldEmulate())
- g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
- // The EFB cache is now potentially stale.
- g_framebuffer_manager->FlagPeekCacheAsOutOfDate();
- }
- if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
- {
- ERROR_LOG_FMT(VIDEO,
- "xf.numtexgens ({}) does not match bp.numtexgens ({}). Error in command stream.",
- xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
- }
- }
- void VertexManagerBase::DoState(PointerWrap& p)
- {
- if (p.IsReadMode())
- {
- // Flush old vertex data before loading state.
- Flush();
- }
- p.Do(m_zslope);
- p.Do(VertexLoaderManager::normal_cache);
- p.Do(VertexLoaderManager::tangent_cache);
- p.Do(VertexLoaderManager::binormal_cache);
- }
- void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
- {
- float out[12];
- float viewOffset[2] = {xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
- xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
- if (m_current_primitive_type != PrimitiveType::Triangles &&
- m_current_primitive_type != PrimitiveType::TriangleStrip)
- {
- return;
- }
- // Global matrix ID.
- u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
- const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
- // Make sure the buffer contains at least 3 vertices.
- if ((m_cur_buffer_pointer - m_base_buffer_pointer) < (vert_decl.stride * 3))
- return;
- // Lookup vertices of the last rendered triangle and software-transform them
- // This allows us to determine the depth slope, which will be used if z-freeze
- // is enabled in the following flush.
- auto& system = Core::System::GetInstance();
- auto& vertex_shader_manager = system.GetVertexShaderManager();
- for (unsigned int i = 0; i < 3; ++i)
- {
- // If this vertex format has per-vertex position matrix IDs, look it up.
- if (vert_decl.posmtx.enable)
- mtxIdx = VertexLoaderManager::position_matrix_index_cache[2 - i];
- if (vert_decl.position.components == 2)
- VertexLoaderManager::position_cache[2 - i][2] = 0;
- vertex_shader_manager.TransformToClipSpace(&VertexLoaderManager::position_cache[2 - i][0],
- &out[i * 4], mtxIdx);
- // Transform to Screenspace
- float inv_w = 1.0f / out[3 + i * 4];
- out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
- out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
- out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
- }
- float dx31 = out[8] - out[0];
- float dx12 = out[0] - out[4];
- float dy12 = out[1] - out[5];
- float dy31 = out[9] - out[1];
- float DF31 = out[10] - out[2];
- float DF21 = out[6] - out[2];
- float a = DF31 * -dy12 - DF21 * dy31;
- float b = dx31 * DF21 + dx12 * DF31;
- float c = -dx12 * dy31 - dx31 * -dy12;
- // Sometimes we process de-generate triangles. Stop any divide by zeros
- if (c == 0)
- return;
- m_zslope.dfdx = -a / c;
- m_zslope.dfdy = -b / c;
- m_zslope.f0 = out[2] - (out[0] * m_zslope.dfdx + out[1] * m_zslope.dfdy);
- m_zslope.dirty = true;
- }
- void VertexManagerBase::CalculateNormals(NativeVertexFormat* format)
- {
- const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
- // Only update the binormal/tangent vertex shader constants if the vertex format lacks binormals
- // (VertexLoaderManager::binormal_cache gets updated by the vertex loader when binormals are
- // present, though)
- if (vert_decl.normals[1].enable)
- return;
- VertexLoaderManager::tangent_cache[3] = 0;
- VertexLoaderManager::binormal_cache[3] = 0;
- auto& system = Core::System::GetInstance();
- auto& vertex_shader_manager = system.GetVertexShaderManager();
- if (vertex_shader_manager.constants.cached_tangent != VertexLoaderManager::tangent_cache)
- {
- vertex_shader_manager.constants.cached_tangent = VertexLoaderManager::tangent_cache;
- vertex_shader_manager.dirty = true;
- }
- if (vertex_shader_manager.constants.cached_binormal != VertexLoaderManager::binormal_cache)
- {
- vertex_shader_manager.constants.cached_binormal = VertexLoaderManager::binormal_cache;
- vertex_shader_manager.dirty = true;
- }
- if (vert_decl.normals[0].enable)
- return;
- VertexLoaderManager::normal_cache[3] = 0;
- if (vertex_shader_manager.constants.cached_normal != VertexLoaderManager::normal_cache)
- {
- vertex_shader_manager.constants.cached_normal = VertexLoaderManager::normal_cache;
- vertex_shader_manager.dirty = true;
- }
- }
- void VertexManagerBase::UpdatePipelineConfig()
- {
- NativeVertexFormat* vertex_format = VertexLoaderManager::GetCurrentVertexFormat();
- if (vertex_format != m_current_pipeline_config.vertex_format)
- {
- m_current_pipeline_config.vertex_format = vertex_format;
- m_current_uber_pipeline_config.vertex_format =
- VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration());
- m_pipeline_config_changed = true;
- }
- VertexShaderUid vs_uid = GetVertexShaderUid();
- if (vs_uid != m_current_pipeline_config.vs_uid)
- {
- m_current_pipeline_config.vs_uid = vs_uid;
- m_current_uber_pipeline_config.vs_uid = UberShader::GetVertexShaderUid();
- m_pipeline_config_changed = true;
- }
- PixelShaderUid ps_uid = GetPixelShaderUid();
- if (ps_uid != m_current_pipeline_config.ps_uid)
- {
- m_current_pipeline_config.ps_uid = ps_uid;
- m_current_uber_pipeline_config.ps_uid = UberShader::GetPixelShaderUid();
- m_pipeline_config_changed = true;
- }
- GeometryShaderUid gs_uid = GetGeometryShaderUid(GetCurrentPrimitiveType());
- if (gs_uid != m_current_pipeline_config.gs_uid)
- {
- m_current_pipeline_config.gs_uid = gs_uid;
- m_current_uber_pipeline_config.gs_uid = gs_uid;
- m_pipeline_config_changed = true;
- }
- if (m_rasterization_state_changed)
- {
- m_rasterization_state_changed = false;
- RasterizationState new_rs = {};
- new_rs.Generate(bpmem, m_current_primitive_type);
- if (new_rs != m_current_pipeline_config.rasterization_state)
- {
- m_current_pipeline_config.rasterization_state = new_rs;
- m_current_uber_pipeline_config.rasterization_state = new_rs;
- m_pipeline_config_changed = true;
- }
- }
- if (m_depth_state_changed)
- {
- m_depth_state_changed = false;
- DepthState new_ds = {};
- new_ds.Generate(bpmem);
- if (new_ds != m_current_pipeline_config.depth_state)
- {
- m_current_pipeline_config.depth_state = new_ds;
- m_current_uber_pipeline_config.depth_state = new_ds;
- m_pipeline_config_changed = true;
- }
- }
- if (m_blending_state_changed)
- {
- m_blending_state_changed = false;
- BlendingState new_bs = {};
- new_bs.Generate(bpmem);
- if (new_bs != m_current_pipeline_config.blending_state)
- {
- m_current_pipeline_config.blending_state = new_bs;
- m_current_uber_pipeline_config.blending_state = new_bs;
- m_pipeline_config_changed = true;
- }
- }
- }
- void VertexManagerBase::UpdatePipelineObject()
- {
- if (!m_pipeline_config_changed)
- return;
- m_current_pipeline_object = nullptr;
- m_pipeline_config_changed = false;
- switch (g_ActiveConfig.iShaderCompilationMode)
- {
- case ShaderCompilationMode::Synchronous:
- {
- // Ubershaders disabled? Block and compile the specialized shader.
- m_current_pipeline_object = g_shader_cache->GetPipelineForUid(m_current_pipeline_config);
- }
- break;
- case ShaderCompilationMode::SynchronousUberShaders:
- {
- // Exclusive ubershader mode, always use ubershaders.
- m_current_pipeline_object =
- g_shader_cache->GetUberPipelineForUid(m_current_uber_pipeline_config);
- }
- break;
- case ShaderCompilationMode::AsynchronousUberShaders:
- case ShaderCompilationMode::AsynchronousSkipRendering:
- {
- // Can we background compile shaders? If so, get the pipeline asynchronously.
- auto res = g_shader_cache->GetPipelineForUidAsync(m_current_pipeline_config);
- if (res)
- {
- // Specialized shaders are ready, prefer these.
- m_current_pipeline_object = *res;
- return;
- }
- if (g_ActiveConfig.iShaderCompilationMode == ShaderCompilationMode::AsynchronousUberShaders)
- {
- // Specialized shaders not ready, use the ubershaders.
- m_current_pipeline_object =
- g_shader_cache->GetUberPipelineForUid(m_current_uber_pipeline_config);
- }
- else
- {
- // Ensure we try again next draw. Otherwise, if no registers change between frames, the
- // object will never be drawn, even when the shader is ready.
- m_pipeline_config_changed = true;
- }
- }
- break;
- }
- }
- void VertexManagerBase::OnConfigChange()
- {
- // Reload index generator function tables in case VS expand config changed
- m_index_generator.Init();
- }
- void VertexManagerBase::OnDraw()
- {
- m_draw_counter++;
- // If the last efb copy was too close to the one before it, don't forget about it until the next
- // efb copy happens (which might not be for a long time)
- u32 diff = m_draw_counter - m_last_efb_copy_draw_counter;
- if (m_unflushed_efb_copy && diff > MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
- {
- g_gfx->Flush();
- m_unflushed_efb_copy = false;
- m_last_efb_copy_draw_counter = m_draw_counter;
- }
- // If we didn't have any CPU access last frame, do nothing.
- if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution)
- return;
- // Check if this draw is scheduled to kick a command buffer.
- // The draw counters will always be sorted so a binary search is possible here.
- if (std::ranges::binary_search(m_scheduled_command_buffer_kicks, m_draw_counter))
- {
- // Kick a command buffer on the background thread.
- g_gfx->Flush();
- m_unflushed_efb_copy = false;
- m_last_efb_copy_draw_counter = m_draw_counter;
- }
- }
- void VertexManagerBase::OnCPUEFBAccess()
- {
- // Check this isn't another access without any draws inbetween.
- if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter)
- return;
- // Store the current draw counter for scheduling in OnEndFrame.
- m_cpu_accesses_this_frame.emplace_back(m_draw_counter);
- }
- void VertexManagerBase::OnEFBCopyToRAM()
- {
- // If we're not deferring, try to preempt it next frame.
- if (!g_ActiveConfig.bDeferEFBCopies)
- {
- OnCPUEFBAccess();
- return;
- }
- // Otherwise, only execute if we have at least 10 objects between us and the last copy.
- const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter;
- m_last_efb_copy_draw_counter = m_draw_counter;
- if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
- {
- m_unflushed_efb_copy = true;
- return;
- }
- m_unflushed_efb_copy = false;
- g_gfx->Flush();
- }
- void VertexManagerBase::OnEndFrame()
- {
- m_draw_counter = 0;
- m_last_efb_copy_draw_counter = 0;
- m_scheduled_command_buffer_kicks.clear();
- // If we have no CPU access at all, leave everything in the one command buffer for maximum
- // parallelism between CPU/GPU, at the cost of slightly higher latency.
- if (m_cpu_accesses_this_frame.empty())
- return;
- // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway
- // between the draw counters that invoked the readback, or every 250 draws, whichever is
- // smaller.
- if (g_ActiveConfig.iCommandBufferExecuteInterval > 0)
- {
- u32 last_draw_counter = 0;
- u32 interval = static_cast<u32>(g_ActiveConfig.iCommandBufferExecuteInterval);
- for (u32 draw_counter : m_cpu_accesses_this_frame)
- {
- // We don't want to waste executing command buffers for only a few draws, so set a minimum.
- // Leave last_draw_counter as-is, so we get the correct number of draws between submissions.
- u32 draw_count = draw_counter - last_draw_counter;
- if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
- continue;
- if (draw_count <= interval)
- {
- u32 mid_point = draw_count / 2;
- m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point);
- }
- else
- {
- u32 counter = interval;
- while (counter < draw_count)
- {
- m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter);
- counter += interval;
- }
- }
- last_draw_counter = draw_counter;
- }
- }
- m_cpu_accesses_this_frame.clear();
- // We invalidate the pipeline object at the start of the frame.
- // This is for the rare case where only a single pipeline configuration is used,
- // and hybrid ubershaders have compiled the specialized shader, but without any
- // state changes the specialized shader will not take over.
- InvalidatePipelineObject();
- }
- void VertexManagerBase::NotifyCustomShaderCacheOfHostChange(const ShaderHostConfig& host_config)
- {
- m_custom_shader_cache->SetHostConfig(host_config);
- m_custom_shader_cache->Reload();
- }
- void VertexManagerBase::RenderDrawCall(
- PixelShaderManager& pixel_shader_manager, GeometryShaderManager& geometry_shader_manager,
- const CustomPixelShaderContents& custom_pixel_shader_contents,
- std::span<u8> custom_pixel_shader_uniforms, PrimitiveType primitive_type,
- const AbstractPipeline* current_pipeline)
- {
- // Now we can upload uniforms, as nothing else will override them.
- geometry_shader_manager.SetConstants(primitive_type);
- pixel_shader_manager.SetConstants();
- if (!custom_pixel_shader_uniforms.empty() &&
- pixel_shader_manager.custom_constants.data() != custom_pixel_shader_uniforms.data())
- {
- pixel_shader_manager.custom_constants_dirty = true;
- }
- pixel_shader_manager.custom_constants = custom_pixel_shader_uniforms;
- UploadUniforms();
- g_gfx->SetPipeline(current_pipeline);
- u32 base_vertex, base_index;
- CommitBuffer(m_index_generator.GetNumVerts(),
- VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(),
- m_index_generator.GetIndexLen(), &base_vertex, &base_index);
- if (g_ActiveConfig.backend_info.api_type != APIType::D3D &&
- g_ActiveConfig.UseVSForLinePointExpand() &&
- (primitive_type == PrimitiveType::Points || primitive_type == PrimitiveType::Lines))
- {
- // VS point/line expansion puts the vertex id at gl_VertexID << 2
- // That means the base vertex has to be adjusted to match
- // (The shader adds this after shifting right on D3D, so no need to do this)
- base_vertex <<= 2;
- }
- DrawCurrentBatch(base_index, m_index_generator.GetIndexLen(), base_vertex);
- }
- const AbstractPipeline* VertexManagerBase::GetCustomPipeline(
- const CustomPixelShaderContents& custom_pixel_shader_contents,
- const VideoCommon::GXPipelineUid& current_pipeline_config,
- const VideoCommon::GXUberPipelineUid& current_uber_pipeline_config,
- const AbstractPipeline* current_pipeline) const
- {
- if (current_pipeline)
- {
- if (!custom_pixel_shader_contents.shaders.empty())
- {
- CustomShaderInstance custom_shaders;
- custom_shaders.pixel_contents = custom_pixel_shader_contents;
- switch (g_ActiveConfig.iShaderCompilationMode)
- {
- case ShaderCompilationMode::Synchronous:
- case ShaderCompilationMode::AsynchronousSkipRendering:
- {
- if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
- current_pipeline_config, custom_shaders, current_pipeline->m_config))
- {
- return *pipeline;
- }
- }
- break;
- case ShaderCompilationMode::SynchronousUberShaders:
- {
- // D3D has issues compiling large custom ubershaders
- // use specialized shaders instead
- if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
- {
- if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
- current_pipeline_config, custom_shaders, current_pipeline->m_config))
- {
- return *pipeline;
- }
- }
- else
- {
- if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
- current_uber_pipeline_config, custom_shaders, current_pipeline->m_config))
- {
- return *pipeline;
- }
- }
- }
- break;
- case ShaderCompilationMode::AsynchronousUberShaders:
- {
- if (auto pipeline = m_custom_shader_cache->GetPipelineAsync(
- current_pipeline_config, custom_shaders, current_pipeline->m_config))
- {
- return *pipeline;
- }
- else if (auto uber_pipeline = m_custom_shader_cache->GetPipelineAsync(
- current_uber_pipeline_config, custom_shaders, current_pipeline->m_config))
- {
- return *uber_pipeline;
- }
- }
- break;
- };
- }
- }
- return nullptr;
- }
|