MTLStateTracker.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. // Copyright 2022 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #pragma once
  4. #include <Metal/Metal.h>
  5. #include <atomic>
  6. #include <memory>
  7. #include <vector>
  8. #include "Common/Assert.h"
  9. #include "Common/CommonTypes.h"
  10. #include "Common/MathUtil.h"
  11. #include "VideoBackends/Metal/MRCHelpers.h"
  12. #include "VideoBackends/Metal/MTLObjectCache.h"
  13. #include "VideoBackends/Metal/MTLTexture.h"
  14. #include "VideoBackends/Metal/MTLUtil.h"
  15. #include "VideoCommon/Constants.h"
  16. #include "VideoCommon/FramebufferManager.h"
  17. #include "VideoCommon/PerfQueryBase.h"
  18. namespace Metal
  19. {
  20. class Pipeline;
  21. class ComputePipeline;
  22. class StateTracker
  23. {
  24. public:
  25. enum class UploadBuffer
  26. {
  27. Other,
  28. Uniform,
  29. Vertex,
  30. Index,
  31. Texels,
  32. Last = Texels
  33. };
  34. struct Map
  35. {
  36. id<MTLBuffer> gpu_buffer;
  37. size_t gpu_offset;
  38. void* cpu_buffer;
  39. };
  40. enum class AlignMask : size_t
  41. {
  42. None = 0,
  43. Other = 15,
  44. Uniform = 255,
  45. };
  46. StateTracker(StateTracker&&) = delete;
  47. explicit StateTracker();
  48. ~StateTracker();
  49. Framebuffer* GetCurrentFramebuffer() { return m_current_framebuffer; };
  50. void SetCurrentFramebuffer(Framebuffer* framebuffer);
  51. void BeginClearRenderPass(MTLClearColor color, float depth);
  52. void BeginRenderPass(MTLLoadAction load_action);
  53. void BeginRenderPass(MTLRenderPassDescriptor* descriptor);
  54. void BeginComputePass();
  55. MTLRenderPassDescriptor* GetRenderPassDescriptor(Framebuffer* framebuffer,
  56. MTLLoadAction load_action);
  57. void EndRenderPass();
  58. void FlushEncoders();
  59. void WaitForFlushedEncoders();
  60. bool HasUnflushedData() { return static_cast<bool>(m_current_render_cmdbuf); }
  61. bool GPUBusy()
  62. {
  63. return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
  64. }
  65. void ReloadSamplers();
  66. void NotifyOfCPUGPUSync()
  67. {
  68. if (!g_features.manual_buffer_upload || !m_manual_buffer_upload)
  69. return;
  70. if (m_upload_cmdbuf || m_current_render_cmdbuf)
  71. return;
  72. SetManualBufferUpload(false);
  73. }
  74. void SetPipeline(const Pipeline* pipe);
  75. void SetPipeline(const ComputePipeline* pipe);
  76. void SetScissor(const MathUtil::Rectangle<int>& rect);
  77. void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
  78. void SetTexture(u32 idx, id<MTLTexture> texture);
  79. void SetSampler(u32 idx, const SamplerState& sampler);
  80. void InvalidateUniforms(bool vertex, bool geometry, bool fragment);
  81. void SetUtilityUniform(const void* buffer, size_t size);
  82. void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
  83. void SetVerticesAndIndices(id<MTLBuffer> vertices, id<MTLBuffer> indices);
  84. void SetBBoxBuffer(id<MTLBuffer> bbox, id<MTLFence> upload, id<MTLFence> download);
  85. void SetVertexBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
  86. void SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
  87. /// Use around utility draws that are commonly used immediately before gx draws to the same buffer
  88. void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; }
  89. void EnablePerfQuery(PerfQueryGroup group, u32 query_id);
  90. void DisablePerfQuery();
  91. void UnbindTexture(id<MTLTexture> texture);
  92. void Draw(u32 base_vertex, u32 num_vertices);
  93. void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex);
  94. void DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, u32 groups_x,
  95. u32 groups_y, u32 groups_z);
  96. void ResolveTexture(id<MTLTexture> src, id<MTLTexture> dst, u32 layer, u32 level);
  97. size_t Align(size_t amt, AlignMask align)
  98. {
  99. return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
  100. }
  101. Map AllocateForTextureUpload(size_t amt);
  102. Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
  103. {
  104. Preallocate(buffer_idx, amt);
  105. return CommitPreallocation(buffer_idx, amt, align);
  106. }
  107. std::pair<void*, size_t> Preallocate(UploadBuffer buffer_idx, size_t amt);
  108. /// Must follow a call to Preallocate where amt is >= to the one provided here
  109. Map CommitPreallocation(UploadBuffer buffer_idx, size_t amt, AlignMask align)
  110. {
  111. DEBUG_ASSERT((m_upload_buffers[static_cast<int>(buffer_idx)].usage.Pos() &
  112. static_cast<size_t>(align)) == 0);
  113. return CommitPreallocation(buffer_idx, Align(amt, align));
  114. }
  115. id<MTLBlitCommandEncoder> GetUploadEncoder();
  116. id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
  117. id<MTLCommandBuffer> GetRenderCmdBuf();
  118. private:
  119. class UsageTracker
  120. {
  121. struct UsageEntry
  122. {
  123. u64 drawno;
  124. size_t pos;
  125. };
  126. std::vector<UsageEntry> m_usage;
  127. size_t m_size = 0;
  128. size_t m_pos = 0;
  129. public:
  130. size_t Size() { return m_size; }
  131. size_t Pos() { return m_pos; }
  132. bool PrepareForAllocation(u64 last_draw, size_t amt);
  133. size_t Allocate(u64 current_draw, size_t amt);
  134. void Reset(size_t new_size);
  135. };
  136. struct CPUBuffer
  137. {
  138. UsageTracker usage;
  139. MRCOwned<id<MTLBuffer>> mtlbuffer;
  140. void* buffer = nullptr;
  141. };
  142. struct BufferPair
  143. {
  144. UsageTracker usage;
  145. MRCOwned<id<MTLBuffer>> cpubuffer;
  146. MRCOwned<id<MTLBuffer>> gpubuffer;
  147. void* buffer = nullptr;
  148. size_t last_upload = 0;
  149. };
  150. struct Backref;
  151. struct PerfQueryTracker;
  152. std::shared_ptr<Backref> m_backref;
  153. std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
  154. MRCOwned<id<MTLFence>> m_fence;
  155. MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
  156. MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
  157. MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
  158. MRCOwned<id<MTLBlitCommandEncoder>> m_texture_upload_encoder;
  159. MRCOwned<id<MTLCommandBuffer>> m_current_render_cmdbuf;
  160. MRCOwned<id<MTLCommandBuffer>> m_last_render_cmdbuf;
  161. MRCOwned<id<MTLRenderCommandEncoder>> m_current_render_encoder;
  162. MRCOwned<id<MTLComputeCommandEncoder>> m_current_compute_encoder;
  163. MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
  164. Framebuffer* m_current_framebuffer;
  165. CPUBuffer m_texture_upload_buffer;
  166. BufferPair m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
  167. u64 m_current_draw = 1;
  168. std::atomic<u64> m_last_finished_draw{0};
  169. MRCOwned<id<MTLTexture>> m_dummy_texture;
  170. // Compute has a set of samplers and a set of writable images
  171. static constexpr u32 MAX_COMPUTE_TEXTURES = VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS * 2;
  172. static constexpr u32 MAX_PIXEL_TEXTURES = VideoCommon::MAX_PIXEL_SHADER_SAMPLERS;
  173. static constexpr u32 MAX_TEXTURES = std::max(MAX_PIXEL_TEXTURES, MAX_COMPUTE_TEXTURES);
  174. static constexpr u32 MAX_SAMPLERS =
  175. std::max(VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS);
  176. // MARK: State
  177. u16 m_dirty_textures;
  178. u16 m_dirty_samplers;
  179. static_assert(sizeof(m_dirty_textures) * 8 >= MAX_TEXTURES, "Make this bigger");
  180. static_assert(sizeof(m_dirty_samplers) * 8 >= MAX_SAMPLERS, "Make this bigger");
  181. union Flags
  182. {
  183. struct
  184. {
  185. // clang-format off
  186. bool has_gx_vs_uniform : 1;
  187. bool has_gx_gs_uniform : 1;
  188. bool has_gx_ps_uniform : 1;
  189. bool has_utility_vs_uniform : 1;
  190. bool has_utility_ps_uniform : 1;
  191. bool has_pipeline : 1;
  192. bool has_scissor : 1;
  193. bool has_viewport : 1;
  194. bool has_vertices : 1;
  195. bool has_texel_buffer : 1;
  196. bool bbox_fence : 1;
  197. bool should_apply_label : 1;
  198. // clang-format on
  199. };
  200. u16 bits = 0;
  201. void NewEncoder()
  202. {
  203. Flags reset_mask;
  204. // Set the flags you *don't* want to reset
  205. reset_mask.should_apply_label = true;
  206. bits &= reset_mask.bits;
  207. }
  208. } m_flags;
  209. /// Things that represent the state of the encoder
  210. struct Current
  211. {
  212. NSString* label;
  213. id<MTLRenderPipelineState> pipeline;
  214. std::array<id<MTLBuffer>, 2> vertex_buffers;
  215. std::array<id<MTLBuffer>, 3> fragment_buffers;
  216. u32 width;
  217. u32 height;
  218. MathUtil::Rectangle<int> scissor_rect;
  219. Util::Viewport viewport;
  220. MTLDepthClipMode depth_clip_mode;
  221. MTLCullMode cull_mode;
  222. DepthStencilSelector depth_stencil;
  223. PerfQueryGroup perf_query_group;
  224. } m_current;
  225. std::shared_ptr<PerfQueryTracker> m_current_perf_query;
  226. /// Things that represent what we'd *like* to have on the encoder for the next draw
  227. struct State
  228. {
  229. MathUtil::Rectangle<int> scissor_rect;
  230. Util::Viewport viewport;
  231. const Pipeline* render_pipeline = nullptr;
  232. const ComputePipeline* compute_pipeline = nullptr;
  233. std::array<id<MTLTexture>, MAX_TEXTURES> textures = {};
  234. std::array<id<MTLSamplerState>, MAX_SAMPLERS> samplers = {};
  235. std::array<float, MAX_SAMPLERS> sampler_min_lod;
  236. std::array<float, MAX_SAMPLERS> sampler_max_lod;
  237. std::array<SamplerState, MAX_SAMPLERS> sampler_states;
  238. const Texture* compute_texture = nullptr;
  239. std::unique_ptr<u8[]> utility_uniform;
  240. u32 utility_uniform_size = 0;
  241. u32 utility_uniform_capacity = 0;
  242. id<MTLBuffer> bbox = nullptr;
  243. id<MTLFence> bbox_upload_fence = nullptr;
  244. id<MTLFence> bbox_download_fence = nullptr;
  245. id<MTLBuffer> vertices = nullptr;
  246. id<MTLBuffer> indices = nullptr;
  247. id<MTLBuffer> texels = nullptr;
  248. u32 texel_buffer_offset0;
  249. u32 texel_buffer_offset1;
  250. PerfQueryGroup perf_query_group = static_cast<PerfQueryGroup>(-1);
  251. u32 perf_query_id;
  252. } m_state;
  253. u32 m_perf_query_tracker_counter = 0;
  254. bool m_manual_buffer_upload = false;
  255. void SetManualBufferUpload(bool enable);
  256. std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
  257. void SetSamplerForce(u32 idx, const SamplerState& sampler);
  258. void Sync(BufferPair& buffer);
  259. Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
  260. void CheckViewport();
  261. void CheckScissor();
  262. void PrepareRender();
  263. void PrepareCompute();
  264. };
  265. extern std::unique_ptr<StateTracker> g_state_tracker;
  266. } // namespace Metal