MTLStateTracker.mm 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. // Copyright 2022 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoBackends/Metal/MTLStateTracker.h"
  4. #include <algorithm>
  5. #include <bit>
  6. #include <mutex>
  7. #include "Common/Align.h"
  8. #include "Common/Assert.h"
  9. #include "Core/System.h"
  10. #include "VideoBackends/Metal/MTLObjectCache.h"
  11. #include "VideoBackends/Metal/MTLPerfQuery.h"
  12. #include "VideoBackends/Metal/MTLPipeline.h"
  13. #include "VideoBackends/Metal/MTLTexture.h"
  14. #include "VideoBackends/Metal/MTLUtil.h"
  15. #include "VideoCommon/GeometryShaderManager.h"
  16. #include "VideoCommon/PixelShaderManager.h"
  17. #include "VideoCommon/Statistics.h"
  18. #include "VideoCommon/VertexShaderManager.h"
  19. #include "VideoCommon/VideoConfig.h"
  20. static constexpr u32 PERF_QUERY_BUFFER_SIZE = 512;
  21. std::unique_ptr<Metal::StateTracker> Metal::g_state_tracker;
  22. struct Metal::StateTracker::Backref
  23. {
  24. std::mutex mtx;
  25. StateTracker* state_tracker;
  26. explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
  27. };
  28. struct Metal::StateTracker::PerfQueryTracker
  29. {
  30. MRCOwned<id<MTLBuffer>> buffer;
  31. const u64* contents;
  32. std::vector<PerfQueryGroup> groups;
  33. u32 query_id;
  34. };
  35. static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
  36. {
  37. // clang-format off
  38. switch (buffer)
  39. {
  40. case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
  41. case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
  42. case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
  43. case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms";
  44. case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
  45. }
  46. // clang-format on
  47. }
  48. // MARK: - UsageTracker
  49. bool Metal::StateTracker::UsageTracker::PrepareForAllocation(u64 last_draw, size_t amt)
  50. {
  51. auto removeme = std::ranges::find_if(
  52. m_usage, [last_draw](UsageEntry usage) { return usage.drawno > last_draw; });
  53. if (removeme != m_usage.begin())
  54. m_usage.erase(m_usage.begin(), removeme);
  55. bool still_in_use = false;
  56. const bool needs_wrap = m_pos + amt > m_size;
  57. if (!m_usage.empty())
  58. {
  59. size_t used = m_usage.front().pos;
  60. if (needs_wrap)
  61. still_in_use = used >= m_pos || used < amt;
  62. else
  63. still_in_use = used >= m_pos && used < m_pos + amt;
  64. }
  65. if (needs_wrap)
  66. m_pos = 0;
  67. return still_in_use || amt > m_size;
  68. }
  69. size_t Metal::StateTracker::UsageTracker::Allocate(u64 current_draw, size_t amt)
  70. {
  71. // Allocation of zero bytes would make the buffer think it's full
  72. // Zero bytes is useless anyways, so don't mark usage in that case
  73. if (!amt)
  74. return m_pos;
  75. if (m_usage.empty() || m_usage.back().drawno != current_draw)
  76. m_usage.push_back({current_draw, m_pos});
  77. size_t ret = m_pos;
  78. m_pos += amt;
  79. return ret;
  80. }
  81. void Metal::StateTracker::UsageTracker::Reset(size_t new_size)
  82. {
  83. m_usage.clear();
  84. m_size = new_size;
  85. m_pos = 0;
  86. }
  87. // MARK: - StateTracker
  88. Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
  89. {
  90. m_flags.should_apply_label = true;
  91. m_fence = MRCTransfer([g_device newFence]);
  92. m_resolve_pass_desc = MRCTransfer([MTLRenderPassDescriptor new]);
  93. auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0];
  94. [color0 setLoadAction:MTLLoadActionLoad];
  95. [color0 setStoreAction:MTLStoreActionMultisampleResolve];
  96. MTLTextureDescriptor* texdesc =
  97. [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
  98. width:1
  99. height:1
  100. mipmapped:NO];
  101. [texdesc setTextureType:MTLTextureType2DArray];
  102. [texdesc setUsage:MTLTextureUsageShaderRead];
  103. [texdesc setStorageMode:MTLStorageModePrivate];
  104. m_dummy_texture = MRCTransfer([g_device newTextureWithDescriptor:texdesc]);
  105. [m_dummy_texture setLabel:@"Dummy Texture"];
  106. for (size_t i = 0; i < std::size(m_state.samplers); ++i)
  107. {
  108. SetSamplerForce(i, RenderState::GetLinearSamplerState());
  109. SetTexture(i, m_dummy_texture);
  110. }
  111. }
  112. Metal::StateTracker::~StateTracker()
  113. {
  114. FlushEncoders();
  115. std::lock_guard<std::mutex> lock(m_backref->mtx);
  116. m_backref->state_tracker = nullptr;
  117. }
  118. // MARK: BufferPair Ops
  119. Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt)
  120. {
  121. amt = (amt + 15) & ~15ull;
  122. CPUBuffer& buffer = m_texture_upload_buffer;
  123. u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
  124. bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
  125. if (needs_new) [[unlikely]]
  126. {
  127. // Orphan buffer
  128. size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
  129. while (newsize < amt)
  130. newsize *= 2;
  131. MTLResourceOptions options =
  132. MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
  133. buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
  134. [buffer.mtlbuffer setLabel:@"Texture Upload Buffer"];
  135. ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
  136. buffer.buffer = [buffer.mtlbuffer contents];
  137. buffer.usage.Reset(newsize);
  138. }
  139. size_t pos = buffer.usage.Allocate(m_current_draw, amt);
  140. Map ret = {buffer.mtlbuffer, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
  141. DEBUG_ASSERT(pos <= buffer.usage.Size() &&
  142. "Previous code should have guaranteed there was enough space");
  143. return ret;
  144. }
  145. std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
  146. {
  147. BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
  148. u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
  149. size_t base_pos = buffer.usage.Pos();
  150. bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
  151. bool needs_upload = needs_new || buffer.usage.Pos() == 0;
  152. if (m_manual_buffer_upload && needs_upload)
  153. {
  154. if (base_pos != buffer.last_upload)
  155. {
  156. id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
  157. [encoder copyFromBuffer:buffer.cpubuffer
  158. sourceOffset:buffer.last_upload
  159. toBuffer:buffer.gpubuffer
  160. destinationOffset:buffer.last_upload
  161. size:base_pos - buffer.last_upload];
  162. }
  163. buffer.last_upload = 0;
  164. }
  165. if (needs_new) [[unlikely]]
  166. {
  167. // Orphan buffer
  168. size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
  169. while (newsize < amt)
  170. newsize *= 2;
  171. MTLResourceOptions options =
  172. MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
  173. buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
  174. [buffer.cpubuffer setLabel:GetName(buffer_idx)];
  175. ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
  176. buffer.buffer = [buffer.cpubuffer contents];
  177. buffer.usage.Reset(newsize);
  178. if (g_features.manual_buffer_upload)
  179. {
  180. options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked;
  181. buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
  182. [buffer.gpubuffer setLabel:GetName(buffer_idx)];
  183. ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
  184. }
  185. }
  186. size_t pos = buffer.usage.Pos();
  187. return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
  188. }
  189. Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
  190. size_t amt)
  191. {
  192. BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
  193. size_t pos = buffer.usage.Allocate(m_current_draw, amt);
  194. Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
  195. ret.gpu_buffer = m_manual_buffer_upload ? buffer.gpubuffer : buffer.cpubuffer;
  196. DEBUG_ASSERT(pos <= buffer.usage.Size() &&
  197. "Previous code should have guaranteed there was enough space");
  198. return ret;
  199. }
  200. void Metal::StateTracker::Sync(BufferPair& buffer)
  201. {
  202. if (!m_manual_buffer_upload || buffer.usage.Pos() == buffer.last_upload)
  203. return;
  204. id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
  205. [encoder copyFromBuffer:buffer.cpubuffer
  206. sourceOffset:buffer.last_upload
  207. toBuffer:buffer.gpubuffer
  208. destinationOffset:buffer.last_upload
  209. size:buffer.usage.Pos() - buffer.last_upload];
  210. buffer.last_upload = buffer.usage.Pos();
  211. }
  212. // MARK: Render Pass / Encoder Management
  213. id<MTLBlitCommandEncoder> Metal::StateTracker::GetUploadEncoder()
  214. {
  215. if (!m_upload_cmdbuf)
  216. {
  217. @autoreleasepool
  218. {
  219. m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
  220. [m_upload_cmdbuf setLabel:@"Vertex Upload"];
  221. m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]);
  222. [m_upload_encoder setLabel:@"Vertex Upload"];
  223. }
  224. }
  225. return m_upload_encoder;
  226. }
  227. id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
  228. {
  229. if (!m_texture_upload_cmdbuf)
  230. {
  231. @autoreleasepool
  232. {
  233. m_texture_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
  234. [m_texture_upload_cmdbuf setLabel:@"Texture Upload"];
  235. m_texture_upload_encoder = MRCRetain([m_texture_upload_cmdbuf blitCommandEncoder]);
  236. [m_texture_upload_encoder setLabel:@"Texture Upload"];
  237. }
  238. }
  239. return m_texture_upload_encoder;
  240. }
  241. id<MTLCommandBuffer> Metal::StateTracker::GetRenderCmdBuf()
  242. {
  243. if (!m_current_render_cmdbuf)
  244. {
  245. @autoreleasepool
  246. {
  247. m_current_render_cmdbuf = MRCRetain([g_queue commandBuffer]);
  248. [m_current_render_cmdbuf setLabel:@"Draw"];
  249. }
  250. }
  251. return m_current_render_cmdbuf;
  252. }
  253. void Metal::StateTracker::SetCurrentFramebuffer(Framebuffer* framebuffer)
  254. {
  255. if (framebuffer == m_current_framebuffer)
  256. return;
  257. EndRenderPass();
  258. m_current_framebuffer = framebuffer;
  259. }
  260. MTLRenderPassDescriptor* Metal::StateTracker::GetRenderPassDescriptor(Framebuffer* framebuffer,
  261. MTLLoadAction load_action)
  262. {
  263. framebuffer->SetLoadAction(load_action);
  264. return framebuffer->PassDesc();
  265. }
  266. void Metal::StateTracker::BeginClearRenderPass(MTLClearColor color, float depth)
  267. {
  268. Framebuffer* framebuffer = m_current_framebuffer;
  269. MTLRenderPassDescriptor* desc = GetRenderPassDescriptor(framebuffer, MTLLoadActionClear);
  270. desc.colorAttachments[0].clearColor = color;
  271. if (framebuffer->GetDepthFormat() != AbstractTextureFormat::Undefined)
  272. desc.depthAttachment.clearDepth = depth;
  273. for (size_t i = 0; i < framebuffer->NumAdditionalColorTextures(); i++)
  274. desc.colorAttachments[i + 1].clearColor = color;
  275. BeginRenderPass(desc);
  276. }
  277. void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action)
  278. {
  279. if (m_current_render_encoder)
  280. return;
  281. BeginRenderPass(GetRenderPassDescriptor(m_current_framebuffer, load_action));
  282. }
  283. void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
  284. {
  285. EndRenderPass();
  286. if (m_current_perf_query)
  287. [descriptor setVisibilityResultBuffer:m_current_perf_query->buffer];
  288. m_current_render_encoder =
  289. MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
  290. if (m_current_perf_query)
  291. [descriptor setVisibilityResultBuffer:nil];
  292. if (m_manual_buffer_upload)
  293. [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
  294. AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
  295. if (!attachment)
  296. attachment = m_current_framebuffer->GetDepthAttachment();
  297. static_assert(std::is_trivially_copyable<decltype(m_current)>::value,
  298. "Make sure we can memset this");
  299. memset(&m_current, 0, sizeof(m_current));
  300. m_current.width = attachment->GetWidth();
  301. m_current.height = attachment->GetHeight();
  302. m_current.scissor_rect = MathUtil::Rectangle<int>(0, 0, m_current.width, m_current.height);
  303. m_current.viewport = {
  304. 0.f, 0.f, static_cast<float>(m_current.width), static_cast<float>(m_current.height),
  305. 0.f, 1.f};
  306. m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always);
  307. m_current.depth_clip_mode = MTLDepthClipModeClip;
  308. m_current.cull_mode = MTLCullModeNone;
  309. m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
  310. m_flags.NewEncoder();
  311. m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
  312. m_dirty_textures = (1 << MAX_TEXTURES) - 1;
  313. CheckScissor();
  314. CheckViewport();
  315. ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
  316. }
  317. void Metal::StateTracker::BeginComputePass()
  318. {
  319. EndRenderPass();
  320. m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
  321. [m_current_compute_encoder setLabel:@"Compute"];
  322. if (m_manual_buffer_upload)
  323. [m_current_compute_encoder waitForFence:m_fence];
  324. m_flags.NewEncoder();
  325. m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
  326. m_dirty_textures = (1 << MAX_TEXTURES) - 1;
  327. }
  328. void Metal::StateTracker::EndRenderPass()
  329. {
  330. if (m_current_render_encoder)
  331. {
  332. if (m_flags.bbox_fence && m_state.bbox_download_fence)
  333. [m_current_render_encoder updateFence:m_state.bbox_download_fence
  334. afterStages:MTLRenderStageFragment];
  335. [m_current_render_encoder endEncoding];
  336. m_current_render_encoder = nullptr;
  337. }
  338. if (m_current_compute_encoder)
  339. {
  340. [m_current_compute_encoder endEncoding];
  341. m_current_compute_encoder = nullptr;
  342. }
  343. }
  344. void Metal::StateTracker::FlushEncoders()
  345. {
  346. if (!m_current_render_cmdbuf)
  347. return;
  348. EndRenderPass();
  349. for (int i = 0; i <= static_cast<int>(UploadBuffer::Last); ++i)
  350. Sync(m_upload_buffers[i]);
  351. if (!m_manual_buffer_upload)
  352. {
  353. ASSERT(!m_upload_cmdbuf && "Should never be used!");
  354. }
  355. else if (m_upload_cmdbuf)
  356. {
  357. [m_upload_encoder updateFence:m_fence];
  358. [m_upload_encoder endEncoding];
  359. [m_upload_cmdbuf commit];
  360. m_upload_encoder = nullptr;
  361. m_upload_cmdbuf = nullptr;
  362. }
  363. if (m_texture_upload_cmdbuf)
  364. {
  365. [m_texture_upload_encoder endEncoding];
  366. [m_texture_upload_cmdbuf commit];
  367. m_texture_upload_encoder = nullptr;
  368. m_texture_upload_cmdbuf = nullptr;
  369. }
  370. [m_current_render_cmdbuf
  371. addCompletedHandler:[backref = m_backref, draw = m_current_draw,
  372. q = std::move(m_current_perf_query)](id<MTLCommandBuffer> buf) {
  373. std::lock_guard<std::mutex> guard(backref->mtx);
  374. if (StateTracker* tracker = backref->state_tracker)
  375. {
  376. // We can do the update non-atomically because we only ever update under the lock
  377. u64 newval =
  378. std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed));
  379. tracker->m_last_finished_draw.store(newval, std::memory_order_release);
  380. if (q)
  381. {
  382. if (PerfQuery* query = static_cast<PerfQuery*>(g_perf_query.get()))
  383. query->ReturnResults(q->contents, q->groups.data(), q->groups.size(), q->query_id);
  384. tracker->m_perf_query_tracker_cache.emplace_back(std::move(q));
  385. }
  386. }
  387. }];
  388. m_current_perf_query = nullptr;
  389. [m_current_render_cmdbuf commit];
  390. m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
  391. m_current_render_cmdbuf = nullptr;
  392. m_current_draw++;
  393. if (g_features.manual_buffer_upload && !m_manual_buffer_upload)
  394. SetManualBufferUpload(true);
  395. }
  396. void Metal::StateTracker::WaitForFlushedEncoders()
  397. {
  398. [m_last_render_cmdbuf waitUntilCompleted];
  399. }
  400. void Metal::StateTracker::ReloadSamplers()
  401. {
  402. for (size_t i = 0; i < std::size(m_state.samplers); ++i)
  403. m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]);
  404. }
  405. void Metal::StateTracker::SetManualBufferUpload(bool enabled)
  406. {
  407. // When a game does something that needs CPU-GPU sync (e.g. bbox, texture download, etc),
  408. // the next command buffer will be done with manual buffer upload disabled,
  409. // since overlapping the upload with the previous draw won't be possible (due to sync).
  410. // This greatly improves performance in heavy bbox games like Super Paper Mario.
  411. m_manual_buffer_upload = enabled;
  412. if (enabled)
  413. {
  414. for (BufferPair& buffer : m_upload_buffers)
  415. {
  416. // Update sync positions, since Sync doesn't do it when manual buffer upload is off
  417. buffer.last_upload = buffer.usage.Pos();
  418. }
  419. }
  420. }
  421. // MARK: State Setters
  422. void Metal::StateTracker::SetPipeline(const Pipeline* pipe)
  423. {
  424. if (pipe != m_state.render_pipeline)
  425. {
  426. m_state.render_pipeline = pipe;
  427. m_flags.has_pipeline = false;
  428. }
  429. }
  430. void Metal::StateTracker::SetPipeline(const ComputePipeline* pipe)
  431. {
  432. if (pipe != m_state.compute_pipeline)
  433. {
  434. m_state.compute_pipeline = pipe;
  435. m_flags.has_pipeline = false;
  436. }
  437. }
  438. void Metal::StateTracker::SetScissor(const MathUtil::Rectangle<int>& rect)
  439. {
  440. m_state.scissor_rect = rect;
  441. CheckScissor();
  442. }
  443. void Metal::StateTracker::CheckScissor()
  444. {
  445. auto clipped = m_state.scissor_rect;
  446. clipped.ClampUL(0, 0, m_current.width, m_current.height);
  447. m_flags.has_scissor = clipped == m_current.scissor_rect;
  448. }
  449. void Metal::StateTracker::SetViewport(float x, float y, float width, float height, float near_depth,
  450. float far_depth)
  451. {
  452. m_state.viewport = {x, y, width, height, near_depth, far_depth};
  453. CheckViewport();
  454. }
  455. void Metal::StateTracker::CheckViewport()
  456. {
  457. m_flags.has_viewport =
  458. 0 == memcmp(&m_state.viewport, &m_current.viewport, sizeof(m_current.viewport));
  459. }
  460. void Metal::StateTracker::SetTexture(u32 idx, id<MTLTexture> texture)
  461. {
  462. ASSERT(idx < std::size(m_state.textures));
  463. if (!texture)
  464. texture = m_dummy_texture;
  465. if (m_state.textures[idx] != texture)
  466. {
  467. m_state.textures[idx] = texture;
  468. m_dirty_textures |= 1 << idx;
  469. }
  470. }
  471. void Metal::StateTracker::SetSamplerForce(u32 idx, const SamplerState& sampler)
  472. {
  473. m_state.samplers[idx] = g_object_cache->GetSampler(sampler);
  474. m_state.sampler_min_lod[idx] = sampler.tm1.min_lod;
  475. m_state.sampler_max_lod[idx] = sampler.tm1.max_lod;
  476. m_state.sampler_states[idx] = sampler;
  477. m_dirty_samplers |= 1 << idx;
  478. }
  479. void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler)
  480. {
  481. ASSERT(idx < std::size(m_state.samplers));
  482. if (m_state.sampler_states[idx] != sampler)
  483. SetSamplerForce(idx, sampler);
  484. }
  485. void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
  486. {
  487. for (size_t i = 0; i < std::size(m_state.textures); ++i)
  488. {
  489. if (m_state.textures[i] == texture)
  490. {
  491. m_state.textures[i] = m_dummy_texture;
  492. m_dirty_textures |= 1 << i;
  493. }
  494. }
  495. }
  496. void Metal::StateTracker::InvalidateUniforms(bool vertex, bool geometry, bool fragment)
  497. {
  498. m_flags.has_gx_vs_uniform &= !vertex;
  499. m_flags.has_gx_gs_uniform &= !geometry;
  500. m_flags.has_gx_ps_uniform &= !fragment;
  501. }
  502. void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
  503. {
  504. // Shader often uses 16-byte aligned types
  505. // Metal validation will complain if our upload is smaller than the struct with padding
  506. size_t aligned_size = Common::AlignUp(size, 16);
  507. if (m_state.utility_uniform_capacity < size)
  508. {
  509. m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[aligned_size]);
  510. m_state.utility_uniform_capacity = static_cast<u32>(aligned_size);
  511. }
  512. m_state.utility_uniform_size = static_cast<u32>(aligned_size);
  513. memcpy(m_state.utility_uniform.get(), buffer, size);
  514. m_flags.has_utility_vs_uniform = false;
  515. m_flags.has_utility_ps_uniform = false;
  516. }
  517. void Metal::StateTracker::SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1)
  518. {
  519. m_state.texels = buffer;
  520. m_state.texel_buffer_offset0 = offset0;
  521. m_state.texel_buffer_offset1 = offset1;
  522. m_flags.has_texel_buffer = false;
  523. }
  524. void Metal::StateTracker::SetVerticesAndIndices(id<MTLBuffer> vertices, id<MTLBuffer> indices)
  525. {
  526. if (m_state.vertices != vertices)
  527. {
  528. m_flags.has_vertices = false;
  529. m_state.vertices = vertices;
  530. }
  531. m_state.indices = indices;
  532. }
  533. void Metal::StateTracker::SetBBoxBuffer(id<MTLBuffer> bbox, id<MTLFence> upload,
  534. id<MTLFence> download)
  535. {
  536. m_state.bbox = bbox;
  537. m_state.bbox_upload_fence = upload;
  538. m_state.bbox_download_fence = download;
  539. }
  540. void Metal::StateTracker::SetVertexBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset)
  541. {
  542. if (idx < std::size(m_current.vertex_buffers) && m_current.vertex_buffers[idx] == buffer)
  543. {
  544. [m_current_render_encoder setVertexBufferOffset:offset atIndex:idx];
  545. }
  546. else
  547. {
  548. [m_current_render_encoder setVertexBuffer:buffer offset:offset atIndex:idx];
  549. m_current.vertex_buffers[idx] = buffer;
  550. }
  551. }
  552. void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset)
  553. {
  554. if (idx < std::size(m_current.fragment_buffers) && m_current.fragment_buffers[idx] == buffer)
  555. {
  556. [m_current_render_encoder setFragmentBufferOffset:offset atIndex:idx];
  557. }
  558. else
  559. {
  560. [m_current_render_encoder setFragmentBuffer:buffer offset:offset atIndex:idx];
  561. m_current.fragment_buffers[idx] = buffer;
  562. }
  563. }
  564. std::shared_ptr<Metal::StateTracker::PerfQueryTracker> Metal::StateTracker::NewPerfQueryTracker()
  565. {
  566. static_cast<PerfQuery*>(g_perf_query.get())->IncCount();
  567. // The cache is repopulated asynchronously
  568. std::lock_guard<std::mutex> lock(m_backref->mtx);
  569. if (m_perf_query_tracker_cache.empty())
  570. {
  571. // Make a new one
  572. @autoreleasepool
  573. {
  574. std::shared_ptr<PerfQueryTracker> tracker = std::make_shared<PerfQueryTracker>();
  575. const MTLResourceOptions options =
  576. MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked;
  577. id<MTLBuffer> buffer = [g_device newBufferWithLength:PERF_QUERY_BUFFER_SIZE * sizeof(u64)
  578. options:options];
  579. [buffer setLabel:[NSString stringWithFormat:@"PerfQuery Buffer %d",
  580. m_perf_query_tracker_counter++]];
  581. tracker->buffer = MRCTransfer(buffer);
  582. tracker->contents = static_cast<const u64*>([buffer contents]);
  583. tracker->query_id = m_state.perf_query_id;
  584. return tracker;
  585. }
  586. }
  587. else
  588. {
  589. // Reuse an old one
  590. std::shared_ptr<PerfQueryTracker> tracker = std::move(m_perf_query_tracker_cache.back());
  591. m_perf_query_tracker_cache.pop_back();
  592. tracker->groups.clear();
  593. tracker->query_id = m_state.perf_query_id;
  594. return tracker;
  595. }
  596. }
  597. void Metal::StateTracker::EnablePerfQuery(PerfQueryGroup group, u32 query_id)
  598. {
  599. m_state.perf_query_group = group;
  600. m_state.perf_query_id = query_id;
  601. if (!m_current_perf_query || m_current_perf_query->query_id != query_id ||
  602. m_current_perf_query->groups.size() == PERF_QUERY_BUFFER_SIZE)
  603. {
  604. if (m_current_render_encoder)
  605. EndRenderPass();
  606. if (m_current_perf_query)
  607. {
  608. [m_current_render_cmdbuf
  609. addCompletedHandler:[backref = m_backref, q = std::move(m_current_perf_query)](id) {
  610. std::lock_guard<std::mutex> guard(backref->mtx);
  611. if (StateTracker* tracker = backref->state_tracker)
  612. {
  613. if (PerfQuery* query = static_cast<PerfQuery*>(g_perf_query.get()))
  614. query->ReturnResults(q->contents, q->groups.data(), q->groups.size(), q->query_id);
  615. tracker->m_perf_query_tracker_cache.emplace_back(std::move(q));
  616. }
  617. }];
  618. m_current_perf_query.reset();
  619. }
  620. }
  621. }
  622. void Metal::StateTracker::DisablePerfQuery()
  623. {
  624. m_state.perf_query_group = static_cast<PerfQueryGroup>(-1);
  625. }
  626. // MARK: Render
  627. // clang-format off
  628. static constexpr NSString* LABEL_GX = @"GX Draw";
  629. static constexpr NSString* LABEL_UTIL = @"Utility Draw";
  630. // clang-format on
  631. static NSRange RangeOfBits(u32 value)
  632. {
  633. ASSERT(value && "Value must be nonzero");
  634. int low = std::countr_zero(value);
  635. int high = 31 - std::countl_zero(value);
  636. return NSMakeRange(low, high + 1 - low);
  637. }
  638. void Metal::StateTracker::PrepareRender()
  639. {
  640. // BeginRenderPass needs this
  641. if (m_state.perf_query_group != static_cast<PerfQueryGroup>(-1) && !m_current_perf_query)
  642. m_current_perf_query = NewPerfQueryTracker();
  643. if (!m_current_render_encoder)
  644. BeginRenderPass(MTLLoadActionLoad);
  645. id<MTLRenderCommandEncoder> enc = m_current_render_encoder;
  646. const Pipeline* pipe = m_state.render_pipeline;
  647. bool is_gx = pipe->Usage() != AbstractPipelineUsage::Utility;
  648. NSString* label = is_gx ? LABEL_GX : LABEL_UTIL;
  649. if (m_flags.should_apply_label && m_current.label != label)
  650. {
  651. m_current.label = label;
  652. [m_current_render_encoder setLabel:label];
  653. }
  654. if (!m_flags.has_pipeline)
  655. {
  656. m_flags.has_pipeline = true;
  657. if (pipe->Get() != m_current.pipeline)
  658. {
  659. m_current.pipeline = pipe->Get();
  660. [enc setRenderPipelineState:pipe->Get()];
  661. }
  662. if (pipe->Cull() != m_current.cull_mode)
  663. {
  664. m_current.cull_mode = pipe->Cull();
  665. [enc setCullMode:pipe->Cull()];
  666. }
  667. if (pipe->DepthStencil() != m_current.depth_stencil)
  668. {
  669. m_current.depth_stencil = pipe->DepthStencil();
  670. [enc setDepthStencilState:g_object_cache->GetDepthStencil(m_current.depth_stencil)];
  671. }
  672. MTLDepthClipMode clip = is_gx && g_ActiveConfig.backend_info.bSupportsDepthClamp ?
  673. MTLDepthClipModeClamp :
  674. MTLDepthClipModeClip;
  675. if (clip != m_current.depth_clip_mode)
  676. {
  677. m_current.depth_clip_mode = clip;
  678. [enc setDepthClipMode:clip];
  679. }
  680. if (is_gx && m_state.bbox_upload_fence && !m_flags.bbox_fence && pipe->UsesFragmentBuffer(2))
  681. {
  682. m_flags.bbox_fence = true;
  683. [enc waitForFence:m_state.bbox_upload_fence beforeStages:MTLRenderStageFragment];
  684. [enc setFragmentBuffer:m_state.bbox offset:0 atIndex:2];
  685. }
  686. }
  687. if (!m_flags.has_viewport)
  688. {
  689. m_flags.has_viewport = true;
  690. m_current.viewport = m_state.viewport;
  691. MTLViewport metal;
  692. metal.originX = m_state.viewport.x;
  693. metal.originY = m_state.viewport.y;
  694. metal.width = m_state.viewport.width;
  695. metal.height = m_state.viewport.height;
  696. metal.znear = m_state.viewport.near_depth;
  697. metal.zfar = m_state.viewport.far_depth;
  698. [enc setViewport:metal];
  699. }
  700. if (!m_flags.has_scissor)
  701. {
  702. m_flags.has_scissor = true;
  703. m_current.scissor_rect = m_state.scissor_rect;
  704. m_current.scissor_rect.ClampUL(0, 0, m_current.width, m_current.height);
  705. MTLScissorRect metal;
  706. metal.x = m_current.scissor_rect.left;
  707. metal.y = m_current.scissor_rect.top;
  708. metal.width = m_current.scissor_rect.right - m_current.scissor_rect.left;
  709. metal.height = m_current.scissor_rect.bottom - m_current.scissor_rect.top;
  710. [enc setScissorRect:metal];
  711. }
  712. if (!m_flags.has_vertices && pipe->UsesVertexBuffer(0))
  713. {
  714. m_flags.has_vertices = true;
  715. if (m_state.vertices)
  716. SetVertexBufferNow(0, m_state.vertices, 0);
  717. }
  718. if (u32 dirty = m_dirty_textures & pipe->GetTextures())
  719. {
  720. m_dirty_textures &= ~pipe->GetTextures();
  721. NSRange range = RangeOfBits(dirty);
  722. [enc setFragmentTextures:&m_state.textures[range.location] withRange:range];
  723. }
  724. if (u32 dirty = m_dirty_samplers & pipe->GetSamplers())
  725. {
  726. m_dirty_samplers &= ~pipe->GetSamplers();
  727. NSRange range = RangeOfBits(dirty);
  728. [enc setFragmentSamplerStates:&m_state.samplers[range.location]
  729. lodMinClamps:&m_state.sampler_min_lod[range.location]
  730. lodMaxClamps:&m_state.sampler_max_lod[range.location]
  731. withRange:range];
  732. }
  733. if (m_state.perf_query_group != m_current.perf_query_group)
  734. {
  735. m_current.perf_query_group = m_state.perf_query_group;
  736. if (m_state.perf_query_group == static_cast<PerfQueryGroup>(-1))
  737. {
  738. [enc setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0];
  739. }
  740. else
  741. {
  742. [enc setVisibilityResultMode:MTLVisibilityResultModeCounting
  743. offset:m_current_perf_query->groups.size() * 8];
  744. m_current_perf_query->groups.push_back(m_state.perf_query_group);
  745. }
  746. }
  747. if (is_gx)
  748. {
  749. // GX draw
  750. if (!m_flags.has_gx_vs_uniform)
  751. {
  752. m_flags.has_gx_vs_uniform = true;
  753. Map map = Allocate(UploadBuffer::Uniform, sizeof(VertexShaderConstants), AlignMask::Uniform);
  754. auto& system = Core::System::GetInstance();
  755. auto& vertex_shader_manager = system.GetVertexShaderManager();
  756. memcpy(map.cpu_buffer, &vertex_shader_manager.constants, sizeof(VertexShaderConstants));
  757. SetVertexBufferNow(1, map.gpu_buffer, map.gpu_offset);
  758. if (pipe->UsesFragmentBuffer(1))
  759. SetFragmentBufferNow(1, map.gpu_buffer, map.gpu_offset);
  760. ADDSTAT(g_stats.this_frame.bytes_uniform_streamed,
  761. Align(sizeof(VertexShaderConstants), AlignMask::Uniform));
  762. }
  763. if (!m_flags.has_gx_gs_uniform && pipe->UsesVertexBuffer(2))
  764. {
  765. m_flags.has_gx_gs_uniform = true;
  766. auto& system = Core::System::GetInstance();
  767. auto& geometry_shader_manager = system.GetGeometryShaderManager();
  768. [m_current_render_encoder setVertexBytes:&geometry_shader_manager.constants
  769. length:sizeof(GeometryShaderConstants)
  770. atIndex:2];
  771. ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, sizeof(GeometryShaderConstants));
  772. }
  773. if (!m_flags.has_gx_ps_uniform)
  774. {
  775. m_flags.has_gx_ps_uniform = true;
  776. Map map = Allocate(UploadBuffer::Uniform, sizeof(PixelShaderConstants), AlignMask::Uniform);
  777. auto& system = Core::System::GetInstance();
  778. auto& pixel_shader_manager = system.GetPixelShaderManager();
  779. memcpy(map.cpu_buffer, &pixel_shader_manager.constants, sizeof(PixelShaderConstants));
  780. SetFragmentBufferNow(0, map.gpu_buffer, map.gpu_offset);
  781. ADDSTAT(g_stats.this_frame.bytes_uniform_streamed,
  782. Align(sizeof(PixelShaderConstants), AlignMask::Uniform));
  783. }
  784. }
  785. else
  786. {
  787. // Utility draw
  788. if (!m_flags.has_utility_vs_uniform && pipe->UsesVertexBuffer(1))
  789. {
  790. m_flags.has_utility_vs_uniform = true;
  791. m_flags.has_gx_vs_uniform = false;
  792. [enc setVertexBytes:m_state.utility_uniform.get()
  793. length:m_state.utility_uniform_size
  794. atIndex:1];
  795. }
  796. if (!m_flags.has_utility_ps_uniform && pipe->UsesFragmentBuffer(0))
  797. {
  798. m_flags.has_utility_ps_uniform = true;
  799. m_flags.has_gx_ps_uniform = false;
  800. [enc setFragmentBytes:m_state.utility_uniform.get()
  801. length:m_state.utility_uniform_size
  802. atIndex:0];
  803. }
  804. if (!m_flags.has_texel_buffer && pipe->UsesFragmentBuffer(2))
  805. {
  806. m_flags.has_texel_buffer = true;
  807. SetFragmentBufferNow(2, m_state.texels, m_state.texel_buffer_offset0);
  808. }
  809. }
  810. }
  811. void Metal::StateTracker::PrepareCompute()
  812. {
  813. if (!m_current_compute_encoder)
  814. BeginComputePass();
  815. id<MTLComputeCommandEncoder> enc = m_current_compute_encoder;
  816. const ComputePipeline* pipe = m_state.compute_pipeline;
  817. if (!m_flags.has_pipeline)
  818. {
  819. m_flags.has_pipeline = true;
  820. [enc setComputePipelineState:pipe->GetComputePipeline()];
  821. }
  822. if (u32 dirty = m_dirty_textures & pipe->GetTextures())
  823. {
  824. m_dirty_textures &= ~pipe->GetTextures();
  825. // Since there's two sets of textures, it's likely there'll be a few in each
  826. // Check each set separately to avoid doing too many unneccessary bindings
  827. constexpr u32 lo_mask = (1 << VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS) - 1;
  828. if (u32 lo = dirty & lo_mask)
  829. {
  830. NSRange range = RangeOfBits(lo);
  831. [enc setTextures:&m_state.textures[range.location] withRange:range];
  832. }
  833. if (u32 hi = dirty & ~lo_mask)
  834. {
  835. NSRange range = RangeOfBits(hi);
  836. [enc setTextures:&m_state.textures[range.location] withRange:range];
  837. }
  838. }
  839. if (u32 dirty = m_dirty_samplers & pipe->GetSamplers())
  840. {
  841. m_dirty_samplers &= ~pipe->GetSamplers();
  842. NSRange range = RangeOfBits(dirty);
  843. [enc setSamplerStates:&m_state.samplers[range.location]
  844. lodMinClamps:&m_state.sampler_min_lod[range.location]
  845. lodMaxClamps:&m_state.sampler_max_lod[range.location]
  846. withRange:range];
  847. }
  848. // Compute and render can't happen at the same time, so just reuse one of the flags
  849. if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))
  850. {
  851. m_flags.has_utility_vs_uniform = true;
  852. [enc setBytes:m_state.utility_uniform.get() length:m_state.utility_uniform_size atIndex:0];
  853. }
  854. if (!m_flags.has_texel_buffer && pipe->UsesBuffer(2))
  855. {
  856. m_flags.has_texel_buffer = true;
  857. [enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset0 atIndex:2];
  858. if (pipe->UsesBuffer(3))
  859. [enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset1 atIndex:3];
  860. }
  861. }
  862. void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices)
  863. {
  864. if (!num_vertices)
  865. return;
  866. PrepareRender();
  867. [m_current_render_encoder drawPrimitives:m_state.render_pipeline->Prim()
  868. vertexStart:base_vertex
  869. vertexCount:num_vertices];
  870. }
  871. void Metal::StateTracker::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
  872. {
  873. if (!num_indices) // Happens in Metroid Prime, Metal API validation doesn't like this
  874. return;
  875. PrepareRender();
  876. [m_current_render_encoder drawIndexedPrimitives:m_state.render_pipeline->Prim()
  877. indexCount:num_indices
  878. indexType:MTLIndexTypeUInt16
  879. indexBuffer:m_state.indices
  880. indexBufferOffset:base_index * sizeof(u16)
  881. instanceCount:1
  882. baseVertex:base_vertex
  883. baseInstance:0];
  884. }
  885. void Metal::StateTracker::DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z,
  886. u32 groups_x, u32 groups_y, u32 groups_z)
  887. {
  888. PrepareCompute();
  889. [m_current_compute_encoder
  890. dispatchThreadgroups:MTLSizeMake(groups_x, groups_y, groups_z)
  891. threadsPerThreadgroup:MTLSizeMake(groupsize_x, groupsize_y, groupsize_z)];
  892. }
  893. void Metal::StateTracker::ResolveTexture(id<MTLTexture> src, id<MTLTexture> dst, u32 layer,
  894. u32 level)
  895. {
  896. EndRenderPass();
  897. auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0];
  898. [color0 setTexture:src];
  899. [color0 setResolveTexture:dst];
  900. [color0 setResolveSlice:layer];
  901. [color0 setResolveLevel:level];
  902. id<MTLRenderCommandEncoder> enc =
  903. [GetRenderCmdBuf() renderCommandEncoderWithDescriptor:m_resolve_pass_desc];
  904. [enc setLabel:@"Multisample Resolve"];
  905. [enc endEncoding];
  906. }