MTLUtil.mm 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. // Copyright 2022 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoBackends/Metal/MTLUtil.h"
  4. #include <fstream>
  5. #include <string>
  6. #include <TargetConditionals.h>
  7. #include <spirv_msl.hpp>
  8. #include "Common/MsgHandler.h"
  9. #include "VideoCommon/Constants.h"
  10. #include "VideoCommon/DriverDetails.h"
  11. #include "VideoCommon/Spirv.h"
  12. Metal::DeviceFeatures Metal::g_features;
  13. std::vector<MRCOwned<id<MTLDevice>>> Metal::Util::GetAdapterList()
  14. {
  15. std::vector<MRCOwned<id<MTLDevice>>> list;
  16. id<MTLDevice> default_dev = MTLCreateSystemDefaultDevice();
  17. if (default_dev)
  18. list.push_back(MRCTransfer(default_dev));
  19. #if TARGET_OS_OSX
  20. auto devices = MRCTransfer(MTLCopyAllDevices());
  21. for (id<MTLDevice> device in devices.Get())
  22. {
  23. if (device != default_dev)
  24. list.push_back(MRCRetain(device));
  25. }
  26. #endif
  27. return list;
  28. }
  29. void Metal::Util::PopulateBackendInfo(VideoConfig* config)
  30. {
  31. config->backend_info.api_type = APIType::Metal;
  32. config->backend_info.bUsesLowerLeftOrigin = false;
  33. config->backend_info.bSupportsExclusiveFullscreen = false;
  34. config->backend_info.bSupportsDualSourceBlend = true;
  35. config->backend_info.bSupportsPrimitiveRestart = true;
  36. config->backend_info.bSupportsGeometryShaders = false;
  37. config->backend_info.bSupportsComputeShaders = true;
  38. config->backend_info.bSupports3DVision = false;
  39. config->backend_info.bSupportsEarlyZ = true;
  40. config->backend_info.bSupportsBindingLayout = true;
  41. config->backend_info.bSupportsBBox = true;
  42. config->backend_info.bSupportsGSInstancing = false;
  43. config->backend_info.bSupportsPostProcessing = true;
  44. config->backend_info.bSupportsPaletteConversion = true;
  45. config->backend_info.bSupportsClipControl = true;
  46. config->backend_info.bSupportsSSAA = true;
  47. config->backend_info.bSupportsFragmentStoresAndAtomics = true;
  48. config->backend_info.bSupportsReversedDepthRange = false;
  49. config->backend_info.bSupportsLogicOp = false;
  50. config->backend_info.bSupportsMultithreading = false;
  51. config->backend_info.bSupportsGPUTextureDecoding = true;
  52. config->backend_info.bSupportsCopyToVram = true;
  53. config->backend_info.bSupportsBitfield = true;
  54. config->backend_info.bSupportsDynamicSamplerIndexing = true;
  55. config->backend_info.bSupportsFramebufferFetch = false;
  56. config->backend_info.bSupportsBackgroundCompiling = true;
  57. config->backend_info.bSupportsLargePoints = true;
  58. config->backend_info.bSupportsPartialDepthCopies = true;
  59. config->backend_info.bSupportsDepthReadback = true;
  60. config->backend_info.bSupportsShaderBinaries = false;
  61. config->backend_info.bSupportsPipelineCacheData = false;
  62. config->backend_info.bSupportsCoarseDerivatives = false;
  63. config->backend_info.bSupportsTextureQueryLevels = true;
  64. config->backend_info.bSupportsLodBiasInSampler = false;
  65. config->backend_info.bSupportsSettingObjectNames = true;
  66. // Metal requires multisample resolve to be done on a render pass
  67. config->backend_info.bSupportsPartialMultisampleResolve = false;
  68. config->backend_info.bSupportsDynamicVertexLoader = true;
  69. config->backend_info.bSupportsVSLinePointExpand = true;
  70. config->backend_info.bSupportsHDROutput =
  71. 1.0 < [[NSScreen deepestScreen] maximumPotentialExtendedDynamicRangeColorComponentValue];
  72. }
  73. void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config,
  74. const std::vector<MRCOwned<id<MTLDevice>>>& adapters)
  75. {
  76. config->backend_info.Adapters.clear();
  77. for (id<MTLDevice> adapter : adapters)
  78. {
  79. config->backend_info.Adapters.push_back([[adapter name] UTF8String]);
  80. }
  81. }
  82. /// For testing driver brokenness
  83. static bool RenderSinglePixel(id<MTLDevice> dev, id<MTLFunction> vs, id<MTLFunction> fs, //
  84. u32 px_in, u32* px_out)
  85. {
  86. auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]);
  87. [pdesc setVertexFunction:vs];
  88. [pdesc setFragmentFunction:fs];
  89. [[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm];
  90. auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]);
  91. if (!pipe)
  92. return false;
  93. auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]);
  94. memcpy([buf contents], &px_in, sizeof(px_in));
  95. auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
  96. width:1
  97. height:1
  98. mipmapped:false];
  99. [tdesc setUsage:MTLTextureUsageRenderTarget];
  100. auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]);
  101. auto q = MRCTransfer([dev newCommandQueue]);
  102. id<MTLCommandBuffer> cmdbuf = [q commandBuffer];
  103. id<MTLBlitCommandEncoder> upload_encoder = [cmdbuf blitCommandEncoder];
  104. [upload_encoder copyFromBuffer:buf
  105. sourceOffset:0
  106. sourceBytesPerRow:4
  107. sourceBytesPerImage:4
  108. sourceSize:MTLSizeMake(1, 1, 1)
  109. toTexture:tex
  110. destinationSlice:0
  111. destinationLevel:0
  112. destinationOrigin:MTLOriginMake(0, 0, 0)];
  113. [upload_encoder endEncoding];
  114. auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
  115. [[rpdesc colorAttachments][0] setTexture:tex];
  116. [[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad];
  117. [[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore];
  118. id<MTLRenderCommandEncoder> renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc];
  119. [renc setRenderPipelineState:pipe];
  120. [renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
  121. [renc endEncoding];
  122. id<MTLBlitCommandEncoder> download_encoder = [cmdbuf blitCommandEncoder];
  123. [download_encoder copyFromTexture:tex
  124. sourceSlice:0
  125. sourceLevel:0
  126. sourceOrigin:MTLOriginMake(0, 0, 0)
  127. sourceSize:MTLSizeMake(1, 1, 1)
  128. toBuffer:buf
  129. destinationOffset:0
  130. destinationBytesPerRow:4
  131. destinationBytesPerImage:4];
  132. [download_encoder endEncoding];
  133. [cmdbuf commit];
  134. [cmdbuf waitUntilCompleted];
  135. memcpy(px_out, [buf contents], sizeof(*px_out));
  136. return [cmdbuf status] == MTLCommandBufferStatusCompleted;
  137. }
  138. static bool DetectIntelGPUFBFetch(id<MTLDevice> dev)
  139. {
  140. // Even though it's nowhere in the feature set tables, some Intel GPUs support fbfetch!
  141. // Annoyingly, the Haswell compiler successfully makes a pipeline but actually miscompiles it and
  142. // doesn't insert any fbfetch instructions.
  143. // The Broadwell compiler inserts the Skylake fbfetch instruction,
  144. // but Broadwell doesn't support that. It seems to make the shader not do anything.
  145. // So we actually have to test the thing
  146. static constexpr const char* shader = R"(
  147. vertex float4 fs_triangle(uint vid [[vertex_id]]) {
  148. return float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1);
  149. }
  150. fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
  151. return in * 2;
  152. }
  153. )";
  154. auto lib = MRCTransfer([dev newLibraryWithSource:[NSString stringWithUTF8String:shader]
  155. options:nil
  156. error:nil]);
  157. if (!lib)
  158. return false;
  159. u32 outpx;
  160. bool ok = RenderSinglePixel(dev, //
  161. MRCTransfer([lib newFunctionWithName:@"fs_triangle"]), //
  162. MRCTransfer([lib newFunctionWithName:@"fbfetch_test"]), //
  163. 0x11223344, &outpx);
  164. if (!ok)
  165. return false;
  166. // Proper fbfetch will double contents, Haswell will return black, and Broadwell will do nothing
  167. if (outpx == 0x22446688)
  168. return true; // Skylake+
  169. else if (outpx == 0x11223344)
  170. return false; // Broadwell
  171. else
  172. return false; // Haswell
  173. }
  174. enum class DetectionResult
  175. {
  176. Yes,
  177. No,
  178. Unsure
  179. };
  180. static DetectionResult DetectInvertedIsHelper(id<MTLDevice> dev)
  181. {
  182. static constexpr const char* shader = R"(
  183. vertex float4 fs_triangle(uint vid [[vertex_id]]) {
  184. return float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1);
  185. }
  186. fragment float4 is_helper_test() {
  187. float val = metal::simd_is_helper_thread() ? 1 : 0.5;
  188. return float4(val, metal::dfdx(val) + 0.5, metal::dfdy(val) + 0.5, 0);
  189. }
  190. )";
  191. auto lib = MRCTransfer([dev newLibraryWithSource:[NSString stringWithUTF8String:shader]
  192. options:nil
  193. error:nil]);
  194. if (!lib)
  195. return DetectionResult::Unsure;
  196. u32 outpx;
  197. bool ok = RenderSinglePixel(dev, //
  198. MRCTransfer([lib newFunctionWithName:@"fs_triangle"]), //
  199. MRCTransfer([lib newFunctionWithName:@"is_helper_test"]), //
  200. 0, &outpx);
  201. // The pixel itself should not be a helper thread (0.5)
  202. // The pixels to its right and below should be helper threads (1.0)
  203. // Correctly working would therefore be 0.5 for the pixel and (0.5 + 0.5) for the derivatives
  204. // Inverted would be 1.0 for the pixel and (-0.5 + 0.5) for the derivatives
  205. if (!ok)
  206. return DetectionResult::Unsure;
  207. if (outpx == 0xffff80)
  208. return DetectionResult::No; // Working correctly
  209. if (outpx == 0x0000ff)
  210. return DetectionResult::Yes; // Inverted
  211. WARN_LOG_FMT(VIDEO, "metal::simd_is_helper_thread might be broken! Test shader returned {:06x}!",
  212. outpx);
  213. return DetectionResult::Unsure;
  214. }
  215. void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice> device)
  216. {
  217. // Initialize DriverDetails first so we can use it later
  218. DriverDetails::Vendor vendor = DriverDetails::VENDOR_UNKNOWN;
  219. std::string name = [[device name] UTF8String];
  220. if (name.find("NVIDIA") != std::string::npos)
  221. vendor = DriverDetails::VENDOR_NVIDIA;
  222. else if (name.find("AMD") != std::string::npos)
  223. vendor = DriverDetails::VENDOR_ATI;
  224. else if (name.find("Intel") != std::string::npos)
  225. vendor = DriverDetails::VENDOR_INTEL;
  226. else if (name.find("Apple") != std::string::npos)
  227. vendor = DriverDetails::VENDOR_APPLE;
  228. const NSOperatingSystemVersion cocoa_ver = [[NSProcessInfo processInfo] operatingSystemVersion];
  229. double version = cocoa_ver.majorVersion * 100 + cocoa_ver.minorVersion;
  230. DriverDetails::Init(DriverDetails::API_METAL, vendor, DriverDetails::DRIVER_APPLE, version,
  231. DriverDetails::Family::UNKNOWN, std::move(name));
  232. #if TARGET_OS_OSX
  233. config->backend_info.bSupportsDepthClamp = true;
  234. config->backend_info.bSupportsST3CTextures = true;
  235. config->backend_info.bSupportsBPTCTextures = true;
  236. #else
  237. bool supports_apple4 = false;
  238. bool supports_bcn = false;
  239. if (@available(iOS 13, *))
  240. supports_apple4 = [device supportsFamily:MTLGPUFamilyApple4];
  241. else
  242. supports_apple4 = [device supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1];
  243. if (@available(iOS 16.4, *))
  244. supports_bcn = [device supportsBCTextureCompression];
  245. config->backend_info.bSupportsDepthClamp = supports_apple4;
  246. config->backend_info.bSupportsST3CTextures = supports_bcn;
  247. config->backend_info.bSupportsBPTCTextures = supports_bcn;
  248. config->backend_info.bSupportsFramebufferFetch = true;
  249. #endif
  250. config->backend_info.AAModes.clear();
  251. for (u32 i = 1; i <= 64; i <<= 1)
  252. {
  253. if ([device supportsTextureSampleCount:i])
  254. config->backend_info.AAModes.push_back(i);
  255. }
  256. switch (config->iManuallyUploadBuffers)
  257. {
  258. case TriState::Off:
  259. g_features.manual_buffer_upload = false;
  260. break;
  261. case TriState::On:
  262. g_features.manual_buffer_upload = true;
  263. break;
  264. case TriState::Auto:
  265. #if TARGET_OS_OSX
  266. g_features.manual_buffer_upload = false;
  267. if (@available(macOS 10.15, *))
  268. if (![device hasUnifiedMemory])
  269. g_features.manual_buffer_upload = true;
  270. #else
  271. // All iOS devices have unified memory
  272. g_features.manual_buffer_upload = false;
  273. #endif
  274. break;
  275. }
  276. g_features.subgroup_ops = false;
  277. if (@available(macOS 10.15, iOS 13, *))
  278. {
  279. // Requires SIMD-scoped reduction operations
  280. g_features.subgroup_ops =
  281. [device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple7];
  282. config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1];
  283. }
  284. if (g_features.subgroup_ops)
  285. {
  286. DetectionResult result = DetectInvertedIsHelper(device);
  287. if (result != DetectionResult::Unsure)
  288. {
  289. bool is_helper_inverted = result == DetectionResult::Yes;
  290. if (is_helper_inverted != DriverDetails::HasBug(DriverDetails::BUG_INVERTED_IS_HELPER))
  291. DriverDetails::OverrideBug(DriverDetails::BUG_INVERTED_IS_HELPER, is_helper_inverted);
  292. }
  293. }
  294. #if TARGET_OS_OSX
  295. if (@available(macOS 11, *))
  296. if (vendor == DriverDetails::VENDOR_INTEL)
  297. config->backend_info.bSupportsFramebufferFetch |= DetectIntelGPUFBFetch(device);
  298. #endif
  299. if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING))
  300. config->backend_info.bSupportsDynamicSamplerIndexing = false;
  301. }
  302. // clang-format off
  303. AbstractTextureFormat Metal::Util::ToAbstract(MTLPixelFormat format)
  304. {
  305. switch (format)
  306. {
  307. case MTLPixelFormatRGBA8Unorm: return AbstractTextureFormat::RGBA8;
  308. case MTLPixelFormatBGRA8Unorm: return AbstractTextureFormat::BGRA8;
  309. case MTLPixelFormatRGB10A2Unorm: return AbstractTextureFormat::RGB10_A2;
  310. case MTLPixelFormatRGBA16Float: return AbstractTextureFormat::RGBA16F;
  311. case MTLPixelFormatBC1_RGBA: return AbstractTextureFormat::DXT1;
  312. case MTLPixelFormatBC2_RGBA: return AbstractTextureFormat::DXT3;
  313. case MTLPixelFormatBC3_RGBA: return AbstractTextureFormat::DXT5;
  314. case MTLPixelFormatBC7_RGBAUnorm: return AbstractTextureFormat::BPTC;
  315. case MTLPixelFormatR16Unorm: return AbstractTextureFormat::R16;
  316. case MTLPixelFormatDepth16Unorm: return AbstractTextureFormat::D16;
  317. #if TARGET_OS_OSX
  318. case MTLPixelFormatDepth24Unorm_Stencil8: return AbstractTextureFormat::D24_S8;
  319. #endif
  320. case MTLPixelFormatR32Float: return AbstractTextureFormat::R32F;
  321. case MTLPixelFormatDepth32Float: return AbstractTextureFormat::D32F;
  322. case MTLPixelFormatDepth32Float_Stencil8: return AbstractTextureFormat::D32F_S8;
  323. default: return AbstractTextureFormat::Undefined;
  324. }
  325. }
  326. // Don't complain about BCn formats requiring iOS 16.4, these are just enum conversions
  327. #pragma clang diagnostic push
  328. #pragma clang diagnostic ignored "-Wunguarded-availability"
  329. MTLPixelFormat Metal::Util::FromAbstract(AbstractTextureFormat format)
  330. {
  331. switch (format)
  332. {
  333. case AbstractTextureFormat::RGBA8: return MTLPixelFormatRGBA8Unorm;
  334. case AbstractTextureFormat::BGRA8: return MTLPixelFormatBGRA8Unorm;
  335. case AbstractTextureFormat::RGB10_A2: return MTLPixelFormatRGB10A2Unorm;
  336. case AbstractTextureFormat::RGBA16F: return MTLPixelFormatRGBA16Float;
  337. case AbstractTextureFormat::DXT1: return MTLPixelFormatBC1_RGBA;
  338. case AbstractTextureFormat::DXT3: return MTLPixelFormatBC2_RGBA;
  339. case AbstractTextureFormat::DXT5: return MTLPixelFormatBC3_RGBA;
  340. case AbstractTextureFormat::BPTC: return MTLPixelFormatBC7_RGBAUnorm;
  341. case AbstractTextureFormat::R16: return MTLPixelFormatR16Unorm;
  342. case AbstractTextureFormat::D16: return MTLPixelFormatDepth16Unorm;
  343. #if TARGET_OS_OSX
  344. case AbstractTextureFormat::D24_S8: return MTLPixelFormatDepth24Unorm_Stencil8;
  345. #endif
  346. case AbstractTextureFormat::R32F: return MTLPixelFormatR32Float;
  347. case AbstractTextureFormat::D32F: return MTLPixelFormatDepth32Float;
  348. case AbstractTextureFormat::D32F_S8: return MTLPixelFormatDepth32Float_Stencil8;
  349. default: return MTLPixelFormatInvalid;
  350. }
  351. }
  352. #pragma clang diagnostic pop
  353. // clang-format on
  354. // MARK: Shader Translation
  355. static const std::string_view SHADER_HEADER = R"(
  356. // Target GLSL 4.5.
  357. #version 450 core
  358. // Always available on Metal
  359. #extension GL_EXT_shader_8bit_storage : require
  360. #extension GL_EXT_shader_16bit_storage : require
  361. #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
  362. #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
  363. #define ATTRIBUTE_LOCATION(x) layout(location = x)
  364. #define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x)
  365. #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y)
  366. #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
  367. #define SAMPLER_BINDING(x) layout(set = 1, binding = x)
  368. #define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8))
  369. #define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
  370. #define INPUT_ATTACHMENT_BINDING(x, y, z) layout(set = x, binding = y, input_attachment_index = z)
  371. #define VARYING_LOCATION(x) layout(location = x)
  372. #define FORCE_EARLY_Z layout(early_fragment_tests) in
  373. // Metal framebuffer fetch helpers.
  374. #define FB_FETCH_VALUE subpassLoad(in_ocol0)
  375. // hlsl to glsl function translation
  376. #define API_METAL 1
  377. #define float2 vec2
  378. #define float3 vec3
  379. #define float4 vec4
  380. #define uint2 uvec2
  381. #define uint3 uvec3
  382. #define uint4 uvec4
  383. #define int2 ivec2
  384. #define int3 ivec3
  385. #define int4 ivec4
  386. #define frac fract
  387. #define lerp mix
  388. // These were changed in Vulkan
  389. #define gl_VertexID gl_VertexIndex
  390. #define gl_InstanceID gl_InstanceIndex
  391. )";
  392. static const std::string_view COMPUTE_SHADER_HEADER = R"(
  393. // Target GLSL 4.5.
  394. #version 450 core
  395. // Always available on Metal
  396. #extension GL_EXT_shader_8bit_storage : require
  397. #extension GL_EXT_shader_16bit_storage : require
  398. #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
  399. #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
  400. #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
  401. #define SAMPLER_BINDING(x) layout(set = 1, binding = x)
  402. #define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
  403. #define IMAGE_BINDING(format, x) layout(format, set = 3, binding = x)
  404. // hlsl to glsl function translation
  405. #define API_METAL 1
  406. #define float2 vec2
  407. #define float3 vec3
  408. #define float4 vec4
  409. #define uint2 uvec2
  410. #define uint3 uvec3
  411. #define uint4 uvec4
  412. #define int2 ivec2
  413. #define int3 ivec3
  414. #define int4 ivec4
  415. #define frac fract
  416. #define lerp mix
  417. )";
  418. static const std::string_view SUBGROUP_HELPER_HEADER = R"(
  419. #extension GL_KHR_shader_subgroup_basic : enable
  420. #extension GL_KHR_shader_subgroup_arithmetic : enable
  421. #extension GL_KHR_shader_subgroup_ballot : enable
  422. #define SUPPORTS_SUBGROUP_REDUCTION 1
  423. #define IS_HELPER_INVOCATION gl_HelperInvocation
  424. #define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
  425. #define SUBGROUP_MIN(value) value = subgroupMin(value)
  426. #define SUBGROUP_MAX(value) value = subgroupMax(value)
  427. )";
  428. static const std::string_view MSL_HEADER =
  429. // We know our shader generator leaves unused variables.
  430. "#pragma clang diagnostic ignored \"-Wunused-variable\"\n"
  431. // These are usually when the compiler doesn't think a switch is exhaustive
  432. "#pragma clang diagnostic ignored \"-Wreturn-type\"\n";
  433. static constexpr std::pair<std::string_view, std::string_view> MSL_FIXUPS[] = {
  434. // Force-unroll the lighting loop in ubershaders, which greatly reduces register pressure on AMD
  435. {"for (uint chan = 0u; chan < 2u; chan++)",
  436. "_Pragma(\"unroll\") for (uint chan = 0u; chan < 2u; chan++)"},
  437. };
  438. static constexpr spirv_cross::MSLResourceBinding
  439. MakeResourceBinding(spv::ExecutionModel stage, u32 set, u32 binding, //
  440. u32 msl_buffer, u32 msl_texture, u32 msl_sampler)
  441. {
  442. spirv_cross::MSLResourceBinding resource;
  443. resource.stage = stage;
  444. resource.desc_set = set;
  445. resource.binding = binding;
  446. resource.msl_buffer = msl_buffer;
  447. resource.msl_texture = msl_texture;
  448. resource.msl_sampler = msl_sampler;
  449. return resource;
  450. }
  451. std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
  452. std::string_view source)
  453. {
  454. std::string full_source;
  455. std::string_view header = stage == ShaderStage::Compute ? COMPUTE_SHADER_HEADER : SHADER_HEADER;
  456. full_source.reserve(header.size() + SUBGROUP_HELPER_HEADER.size() + source.size());
  457. full_source.append(header);
  458. if (Metal::g_features.subgroup_ops)
  459. full_source.append(SUBGROUP_HELPER_HEADER);
  460. if (DriverDetails::HasBug(DriverDetails::BUG_INVERTED_IS_HELPER))
  461. {
  462. full_source.append("#define gl_HelperInvocation !gl_HelperInvocation "
  463. "// Work around broken AMD Metal driver\n");
  464. }
  465. if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_OPS_WITH_DISCARD))
  466. full_source.append("#define BROKEN_SUBGROUP_WITH_DISCARD 1\n");
  467. full_source.append(source);
  468. std::optional<SPIRV::CodeVector> code;
  469. switch (stage)
  470. {
  471. case ShaderStage::Vertex:
  472. code = SPIRV::CompileVertexShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_5);
  473. break;
  474. case ShaderStage::Geometry:
  475. PanicAlertFmt("Tried to compile geometry shader for Metal, but Metal doesn't support them!");
  476. break;
  477. case ShaderStage::Pixel:
  478. code = SPIRV::CompileFragmentShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_5);
  479. break;
  480. case ShaderStage::Compute:
  481. code = SPIRV::CompileComputeShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_5);
  482. break;
  483. }
  484. if (!code.has_value())
  485. return std::nullopt;
  486. // clang-format off
  487. static const spirv_cross::MSLResourceBinding resource_bindings[] = {
  488. MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo
  489. MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo
  490. MakeResourceBinding(spv::ExecutionModelVertex, 0, 3, 2, 0, 0), // vs/ubo
  491. MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
  492. MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
  493. MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
  494. // Dynamic list initialized below Fragment, 1, N, 0, N, N // ps/samp0-N
  495. MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
  496. MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
  497. // Dynamic list initialized below GLCompute, 1, N, 0, N, N, // cs/samp0-N
  498. MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
  499. MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
  500. // Dynamic list initialized below GLCompute, 3, N, 0, N, 0, // cs/img0-N
  501. };
  502. spirv_cross::CompilerMSL::Options options;
  503. #if TARGET_OS_OSX
  504. options.platform = spirv_cross::CompilerMSL::Options::macOS;
  505. #elif TARGET_OS_IOS
  506. options.platform = spirv_cross::CompilerMSL::Options::iOS;
  507. // Otherwise SPIRV-Cross will try to compile subgroup ops to quad ops instead
  508. // (And crash because there's no quad_min or quad_max)
  509. options.ios_use_simdgroup_functions = Metal::g_features.subgroup_ops;
  510. #else
  511. #error What platform is this?
  512. #endif
  513. // clang-format on
  514. spirv_cross::CompilerMSL compiler(std::move(*code));
  515. if (@available(macOS 11, iOS 14, *))
  516. options.set_msl_version(2, 3);
  517. else if (@available(macOS 10.15, iOS 13, *))
  518. options.set_msl_version(2, 2);
  519. else if (@available(macOS 10.14, iOS 12, *))
  520. options.set_msl_version(2, 1);
  521. else
  522. options.set_msl_version(2, 0);
  523. options.use_framebuffer_fetch_subpasses = true;
  524. compiler.set_msl_options(options);
  525. for (auto& binding : resource_bindings)
  526. compiler.add_msl_resource_binding(binding);
  527. if (stage == ShaderStage::Pixel)
  528. {
  529. for (u32 i = 0; i < VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; i++) // ps/samp0-N
  530. {
  531. compiler.add_msl_resource_binding(
  532. MakeResourceBinding(spv::ExecutionModelFragment, 1, i, 0, i, i));
  533. }
  534. }
  535. else if (stage == ShaderStage::Compute)
  536. {
  537. u32 img = 0;
  538. u32 smp = 0;
  539. for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/samp0-N
  540. {
  541. compiler.add_msl_resource_binding(
  542. MakeResourceBinding(spv::ExecutionModelGLCompute, 1, i, 0, img++, smp++));
  543. }
  544. for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/img0-N
  545. {
  546. compiler.add_msl_resource_binding(
  547. MakeResourceBinding(spv::ExecutionModelGLCompute, 3, i, 0, img++, 0));
  548. }
  549. }
  550. std::string output(MSL_HEADER);
  551. std::string compiled = compiler.compile();
  552. std::string_view remaining = compiled;
  553. while (!remaining.empty())
  554. {
  555. // Apply fixups
  556. std::string_view piece = remaining;
  557. std::string_view fixup_piece = {};
  558. size_t next = piece.size();
  559. for (const auto& fixup : MSL_FIXUPS)
  560. {
  561. size_t found = piece.find(fixup.first);
  562. if (found == std::string_view::npos)
  563. continue;
  564. piece = piece.substr(0, found);
  565. fixup_piece = fixup.second;
  566. next = found + fixup.first.size();
  567. }
  568. output += piece;
  569. output += fixup_piece;
  570. remaining = remaining.substr(next);
  571. }
  572. return output;
  573. }