UberShaderVertex.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. // Copyright 2015 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoCommon/UberShaderVertex.h"
  4. #include "Common/EnumUtils.h"
  5. #include "VideoCommon/ConstantManager.h"
  6. #include "VideoCommon/DriverDetails.h"
  7. #include "VideoCommon/NativeVertexFormat.h"
  8. #include "VideoCommon/UberShaderCommon.h"
  9. #include "VideoCommon/VertexShaderGen.h"
  10. #include "VideoCommon/VideoCommon.h"
  11. #include "VideoCommon/XFMemory.h"
  12. namespace UberShader
  13. {
  14. VertexShaderUid GetVertexShaderUid()
  15. {
  16. VertexShaderUid out;
  17. vertex_ubershader_uid_data* const uid_data = out.GetUidData();
  18. uid_data->num_texgens = xfmem.numTexGen.numTexGens;
  19. return out;
  20. }
  21. static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config,
  22. u32 num_texgen, ShaderCode& out);
  23. static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent,
  24. std::string_view name, std::string_view shader_type,
  25. std::string_view stored_type, std::string_view offset_name = {});
  26. ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config,
  27. const vertex_ubershader_uid_data* uid_data)
  28. {
  29. const bool msaa = host_config.msaa;
  30. const bool ssaa = host_config.ssaa;
  31. const bool per_pixel_lighting = host_config.per_pixel_lighting;
  32. const bool vertex_rounding = host_config.vertex_rounding;
  33. const bool vertex_loader =
  34. host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand;
  35. const u32 num_texgen = uid_data->num_texgens;
  36. ShaderCode out;
  37. out.Write("// {}\n\n", *uid_data);
  38. out.Write("{}", s_lighting_struct);
  39. // uniforms
  40. out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
  41. out.Write("{}", s_shader_uniforms);
  42. out.Write("}};\n");
  43. if (vertex_loader)
  44. {
  45. out.Write("UBO_BINDING(std140, 4) uniform GSBlock {{\n");
  46. out.Write("{}", s_geometry_shader_uniforms);
  47. out.Write("}};\n");
  48. }
  49. out.Write("struct VS_OUTPUT {{\n");
  50. GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "", ShaderStage::Vertex);
  51. out.Write("}};\n\n");
  52. WriteIsNanHeader(out, api_type);
  53. WriteBitfieldExtractHeader(out, api_type, host_config);
  54. WriteLightingFunction(out);
  55. if (vertex_loader)
  56. {
  57. out.Write(R"(
  58. SSBO_BINDING(1) readonly restrict buffer Vertices {{
  59. uint vertex_buffer[];
  60. }};
  61. )");
  62. if (api_type == APIType::D3D)
  63. {
  64. // Write a function to get an offset into vertex_buffer corresponding to this vertex.
  65. // This must be done differently for D3D compared to OpenGL/Vulkan/Metal, as on OpenGL, etc.,
  66. // gl_VertexID starts counting at the base vertex specified in glDrawElementsBaseVertex,
  67. // while on D3D, SV_VertexID (which spirv-cross translates gl_VertexID into) starts counting
  68. // at 0 regardless of the BaseVertexLocation value passed to DrawIndexed. In both cases,
  69. // offset 0 of vertex_buffer corresponds to index 0 with basevertex set to 0, so we have to
  70. // manually apply the basevertex offset for D3D
  71. // D3D12 uses a root constant for this uniform, since it changes with every draw.
  72. // D3D11 doesn't currently support dynamic vertex loader, and we'll have to figure something
  73. // out for it if we want to support it in the future.
  74. out.Write("UBO_BINDING(std140, 5) uniform DX_Constants {{\n"
  75. " uint base_vertex;\n"
  76. "}};\n\n"
  77. "uint GetVertexBaseOffset(uint vertex_id) {{\n"
  78. " return (vertex_id + base_vertex) * vertex_stride;\n"
  79. "}}\n");
  80. }
  81. else
  82. {
  83. out.Write("uint GetVertexBaseOffset(uint vertex_id) {{\n"
  84. " return vertex_id * vertex_stride;\n"
  85. "}}\n");
  86. }
  87. out.Write(R"(
  88. uint4 load_input_uint4_ubyte4(uint vtx_offset, uint attr_offset) {{
  89. uint value = vertex_buffer[vtx_offset + attr_offset];
  90. return uint4(value & 0xffu, (value >> 8) & 0xffu, (value >> 16) & 0xffu, value >> 24);
  91. }}
  92. float4 load_input_float4_ubyte4(uint vtx_offset, uint attr_offset) {{
  93. return float4(load_input_uint4_ubyte4(vtx_offset, attr_offset)) / 255.0f;
  94. }}
  95. float3 load_input_float3_float3(uint vtx_offset, uint attr_offset) {{
  96. uint offset = vtx_offset + attr_offset;
  97. return float3(uintBitsToFloat(vertex_buffer[offset + 0]),
  98. uintBitsToFloat(vertex_buffer[offset + 1]),
  99. uintBitsToFloat(vertex_buffer[offset + 2]));
  100. }}
  101. float4 load_input_float4_rawpos(uint vtx_offset, uint attr_offset) {{
  102. uint components = attr_offset >> 16;
  103. uint offset = vtx_offset + (attr_offset & 0xffff);
  104. if (components < 3)
  105. return float4(uintBitsToFloat(vertex_buffer[offset + 0]),
  106. uintBitsToFloat(vertex_buffer[offset + 1]),
  107. 0.0f, 1.0f);
  108. else
  109. return float4(uintBitsToFloat(vertex_buffer[offset + 0]),
  110. uintBitsToFloat(vertex_buffer[offset + 1]),
  111. uintBitsToFloat(vertex_buffer[offset + 2]),
  112. 1.0f);
  113. }}
  114. float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{
  115. uint components = attr_offset >> 16;
  116. uint offset = vtx_offset + (attr_offset & 0xffff);
  117. if (components < 2)
  118. return float3(uintBitsToFloat(vertex_buffer[offset + 0]), 0.0f, 0.0f);
  119. else if (components < 3)
  120. return float3(uintBitsToFloat(vertex_buffer[offset + 0]),
  121. uintBitsToFloat(vertex_buffer[offset + 1]),
  122. 0.0f);
  123. else
  124. return float3(uintBitsToFloat(vertex_buffer[offset + 0]),
  125. uintBitsToFloat(vertex_buffer[offset + 1]),
  126. uintBitsToFloat(vertex_buffer[offset + 2]));
  127. }}
  128. )");
  129. }
  130. else
  131. {
  132. out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawpos;\n", ShaderAttrib::Position);
  133. out.Write("ATTRIBUTE_LOCATION({:s}) in uint4 posmtx;\n", ShaderAttrib::PositionMatrix);
  134. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawnormal;\n", ShaderAttrib::Normal);
  135. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawtangent;\n", ShaderAttrib::Tangent);
  136. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawbinormal;\n", ShaderAttrib::Binormal);
  137. out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawcolor0;\n", ShaderAttrib::Color0);
  138. out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawcolor1;\n", ShaderAttrib::Color1);
  139. for (u32 i = 0; i < 8; ++i)
  140. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawtex{};\n", ShaderAttrib::TexCoord0 + i, i);
  141. }
  142. if (host_config.backend_geometry_shaders)
  143. {
  144. out.Write("VARYING_LOCATION(0) out VertexData {{\n");
  145. GenerateVSOutputMembers(out, api_type, num_texgen, host_config,
  146. GetInterpolationQualifier(msaa, ssaa, true, false),
  147. ShaderStage::Vertex);
  148. out.Write("}} vs;\n");
  149. }
  150. else
  151. {
  152. // Let's set up attributes
  153. u32 counter = 0;
  154. out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
  155. GetInterpolationQualifier(msaa, ssaa));
  156. out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
  157. GetInterpolationQualifier(msaa, ssaa));
  158. for (u32 i = 0; i < num_texgen; ++i)
  159. {
  160. out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
  161. GetInterpolationQualifier(msaa, ssaa), i);
  162. }
  163. if (!host_config.fast_depth_calc)
  164. {
  165. out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
  166. GetInterpolationQualifier(msaa, ssaa));
  167. }
  168. if (per_pixel_lighting)
  169. {
  170. out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
  171. GetInterpolationQualifier(msaa, ssaa));
  172. out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
  173. GetInterpolationQualifier(msaa, ssaa));
  174. }
  175. }
  176. out.Write("void main()\n{{\n");
  177. out.Write("VS_OUTPUT o;\n"
  178. "\n");
  179. if (host_config.backend_vs_point_line_expand)
  180. {
  181. out.Write("uint vertex_id = gl_VertexID;\n"
  182. "if (vs_expand != 0u) {{\n"
  183. " vertex_id = vertex_id >> 2;\n"
  184. "}}\n"
  185. "uint vertex_base_offset = GetVertexBaseOffset(vertex_id);\n");
  186. }
  187. else if (host_config.backend_dynamic_vertex_loader)
  188. {
  189. out.Write("uint vertex_base_offset = GetVertexBaseOffset(gl_VertexID);\n");
  190. }
  191. // rawpos is always needed
  192. LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos");
  193. // Transforms
  194. out.Write("// Position matrix\n"
  195. "float4 P0;\n"
  196. "float4 P1;\n"
  197. "float4 P2;\n"
  198. "\n"
  199. "// Normal matrix\n"
  200. "float3 N0;\n"
  201. "float3 N1;\n"
  202. "float3 N2;\n"
  203. "\n"
  204. "if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n",
  205. Common::ToUnderlying(VB_HAS_POSMTXIDX));
  206. LoadVertexAttribute(out, host_config, 2, "posmtx", "uint4", "ubyte4");
  207. out.Write(" // Vertex format has a per-vertex matrix\n"
  208. " int posidx = int(posmtx.r);\n"
  209. " P0 = " I_TRANSFORMMATRICES "[posidx];\n"
  210. " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
  211. " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
  212. "\n"
  213. " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
  214. " N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
  215. " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
  216. " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
  217. "}} else {{\n"
  218. " // One shared matrix\n"
  219. " P0 = " I_POSNORMALMATRIX "[0];\n"
  220. " P1 = " I_POSNORMALMATRIX "[1];\n"
  221. " P2 = " I_POSNORMALMATRIX "[2];\n"
  222. " N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
  223. " N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
  224. " N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
  225. "}}\n"
  226. "\n"
  227. "// Multiply the position vector by the position matrix\n"
  228. "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
  229. "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
  230. "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
  231. "\n"
  232. "float3 _rawnormal;\n"
  233. "float3 _rawtangent;\n"
  234. "float3 _rawbinormal;\n"
  235. "if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
  236. "{{\n",
  237. Common::ToUnderlying(VB_HAS_NORMAL));
  238. LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3");
  239. out.Write(" _rawnormal = rawnormal;\n"
  240. "}}\n"
  241. "else\n"
  242. "{{\n"
  243. " _rawnormal = " I_CACHED_NORMAL ".xyz;\n"
  244. "}}\n"
  245. "\n"
  246. "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
  247. "{{\n",
  248. Common::ToUnderlying(VB_HAS_TANGENT));
  249. LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3");
  250. out.Write(" _rawtangent = rawtangent;\n"
  251. "}}\n"
  252. "else\n"
  253. "{{\n"
  254. " _rawtangent = " I_CACHED_TANGENT ".xyz;\n"
  255. "}}\n"
  256. "\n"
  257. "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
  258. "{{\n",
  259. Common::ToUnderlying(VB_HAS_BINORMAL));
  260. LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3");
  261. out.Write(" _rawbinormal = rawbinormal;\n"
  262. "}}\n"
  263. "else\n"
  264. "{{\n"
  265. " _rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"
  266. "}}\n"
  267. "\n"
  268. "// The scale of the transform matrix is used to control the size of the emboss map\n"
  269. "// effect by changing the scale of the transformed binormals (which only get used by\n"
  270. "// emboss map texgens). By normalising the first transformed normal (which is used\n"
  271. "// by lighting calculations and needs to be unit length), the same transform matrix\n"
  272. "// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n"
  273. "float3 _normal = normalize(float3(dot(N0, _rawnormal), dot(N1, _rawnormal), dot(N2, "
  274. "_rawnormal)));\n"
  275. "float3 _tangent = float3(dot(N0, _rawtangent), dot(N1, _rawtangent), dot(N2, "
  276. "_rawtangent));\n"
  277. "float3 _binormal = float3(dot(N0, _rawbinormal), dot(N1, _rawbinormal), dot(N2, "
  278. "_rawbinormal));\n");
  279. // Hardware Lighting
  280. out.Write("// xfmem.numColorChans controls the number of color channels available to TEV,\n"
  281. "// but we still need to generate all channels here, as it can be used in texgen.\n"
  282. "// Cel-damage is an example of this.\n"
  283. "float4 vertex_color_0, vertex_color_1;\n"
  284. "\n");
  285. out.Write("// To use color 1, the vertex descriptor must have color 0 and 1.\n"
  286. "// If color 1 is present but not color 0, it is used for lighting channel 0.\n"
  287. "bool use_color_1 = ((components & {0}u) == {0}u); // VB_HAS_COL0 | VB_HAS_COL1\n",
  288. static_cast<u32>(VB_HAS_COL0 | VB_HAS_COL1));
  289. out.Write("if ((components & {0}u) == {0}u) // VB_HAS_COL0 | VB_HAS_COL1\n"
  290. "{{\n",
  291. static_cast<u32>(VB_HAS_COL0 | VB_HAS_COL1));
  292. LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4");
  293. LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4");
  294. out.Write(" vertex_color_0 = rawcolor0;\n"
  295. " vertex_color_1 = rawcolor1;\n"
  296. "}}\n"
  297. "else if ((components & {}u) != 0u) // VB_HAS_COL0\n"
  298. "{{\n",
  299. Common::ToUnderlying(VB_HAS_COL0));
  300. LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4");
  301. out.Write(" vertex_color_0 = rawcolor0;\n"
  302. " vertex_color_1 = rawcolor0;\n"
  303. "}}\n"
  304. "else if ((components & {}u) != 0u) // VB_HAS_COL1\n"
  305. "{{\n",
  306. Common::ToUnderlying(VB_HAS_COL1));
  307. LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4");
  308. out.Write(" vertex_color_0 = rawcolor1;\n"
  309. " vertex_color_1 = rawcolor1;\n"
  310. "}}\n"
  311. "else\n"
  312. "{{\n"
  313. " vertex_color_0 = missing_color_value;\n"
  314. " vertex_color_1 = missing_color_value;\n"
  315. "}}\n");
  316. WriteVertexLighting(out, api_type, "pos.xyz", "_normal", "vertex_color_0", "vertex_color_1",
  317. "o.colors_0", "o.colors_1");
  318. // Texture Coordinates
  319. if (num_texgen > 0)
  320. GenVertexShaderTexGens(api_type, host_config, num_texgen, out);
  321. if (host_config.backend_vs_point_line_expand)
  322. {
  323. out.Write("if (vs_expand == {}u) {{ // Line\n", Common::ToUnderlying(VSExpand::Line));
  324. out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n"
  325. " bool is_right = (gl_VertexID & 1) != 0;\n"
  326. " uint other_base_offset = vertex_base_offset;\n"
  327. " if (is_bottom) {{\n"
  328. " other_base_offset -= vertex_stride;\n"
  329. " }} else {{\n"
  330. " other_base_offset += vertex_stride;\n"
  331. " }}\n"
  332. " float4 other_rawpos = load_input_float4_rawpos(other_base_offset, "
  333. "vertex_offset_rawpos);\n"
  334. " float4 other_p0 = P0;\n"
  335. " float4 other_p1 = P1;\n"
  336. " float4 other_p2 = P2;\n"
  337. " if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n",
  338. Common::ToUnderlying(VB_HAS_POSMTXIDX));
  339. out.Write(" uint other_posidx = load_input_uint4_ubyte4(other_base_offset, "
  340. "vertex_offset_posmtx).r;\n"
  341. " other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n"
  342. " other_p1 = " I_TRANSFORMMATRICES "[other_posidx+1];\n"
  343. " other_p2 = " I_TRANSFORMMATRICES "[other_posidx+2];\n"
  344. " }}\n"
  345. " float4 other_pos = float4(dot(other_p0, other_rawpos), "
  346. "dot(other_p1, other_rawpos), dot(other_p2, other_rawpos), 1.0);\n");
  347. GenerateVSLineExpansion(out, " ", num_texgen);
  348. out.Write("}} else if (vs_expand == {}u) {{ // Point\n", Common::ToUnderlying(VSExpand::Point));
  349. out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n"
  350. " bool is_right = (gl_VertexID & 1) != 0;\n");
  351. GenerateVSPointExpansion(out, " ", num_texgen);
  352. out.Write("}}\n");
  353. }
  354. if (per_pixel_lighting)
  355. {
  356. out.Write("// When per-pixel lighting is enabled, the vertex colors are passed through\n"
  357. "// unmodified so we can evaluate the lighting in the pixel shader.\n"
  358. "// Lighting is also still computed in the vertex shader since it can be used to\n"
  359. "// generate texture coordinates. We generated them above, so now the colors can\n"
  360. "// be reverted to their previous stage.\n"
  361. "o.colors_0 = vertex_color_0;\n"
  362. "o.colors_1 = vertex_color_1;\n"
  363. "// Note that the numColorChans logic should be (but currently isn't)\n"
  364. "// performed in the pixel shader.\n");
  365. }
  366. else
  367. {
  368. out.Write("// The number of colors available to TEV is determined by numColorChans.\n"
  369. "// We have to provide the fields to match the interface, so set to zero\n"
  370. "// if it's not enabled.\n"
  371. "if (xfmem_numColorChans == 0u)\n"
  372. " o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);\n"
  373. "if (xfmem_numColorChans <= 1u)\n"
  374. " o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);\n");
  375. }
  376. if (!host_config.fast_depth_calc)
  377. {
  378. // clipPos/w needs to be done in pixel shader, not here
  379. out.Write("o.clipPos = o.pos;\n");
  380. }
  381. if (per_pixel_lighting)
  382. {
  383. out.Write("o.Normal = _normal;\n"
  384. "o.WorldPos = pos.xyz;\n");
  385. }
  386. // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
  387. // our own depth clipping and calculate the depth range before the perspective divide if
  388. // necessary.
  389. if (host_config.backend_depth_clamp)
  390. {
  391. // Since we're adjusting z for the depth range before the perspective divide, we have to do our
  392. // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
  393. // We adjust our depth value for clipping purposes to match the perspective projection in the
  394. // software backend, which is a hack to fix Sonic Adventure and Unleashed games.
  395. out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
  396. "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
  397. "float clipDist1 = -clipDepth;\n"); // Far: z > 0
  398. if (host_config.backend_geometry_shaders)
  399. {
  400. out.Write("o.clipDist0 = clipDist0;\n"
  401. "o.clipDist1 = clipDist1;\n");
  402. }
  403. }
  404. // Write the true depth value. If the game uses depth textures, then the pixel shader will
  405. // override it with the correct values if not then early z culling will improve speed.
  406. // There are two different ways to do this, when the depth range is oversized, we process
  407. // the depth range in the vertex shader, if not we let the host driver handle it.
  408. //
  409. // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
  410. // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
  411. // We have to handle the depth range in the vertex shader instead of after the perspective
  412. // divide, because some games will use a depth range larger than what is allowed by the
  413. // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
  414. // games effectively add a depth bias to the values written to the depth buffer.
  415. out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
  416. "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
  417. if (!host_config.backend_clip_control)
  418. {
  419. // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
  420. // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
  421. // operation that can introduce a round-trip error.
  422. out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
  423. }
  424. // Correct for negative viewports by mirroring all vertices. We need to negate the height here,
  425. // since the viewport height is already negated by the render backend.
  426. out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
  427. // The console GPU places the pixel center at 7/12 in screen space unless
  428. // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
  429. // in some primitives being placed one pixel too far to the bottom-right,
  430. // which in turn can be critical if it happens for clear quads.
  431. // Hence, we compensate for this pixel center difference so that primitives
  432. // get rasterized correctly.
  433. out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
  434. if (vertex_rounding)
  435. {
  436. // By now our position is in clip space. However, higher resolutions than the Wii outputs
  437. // cause an additional pixel offset. Due to a higher pixel density we need to correct this
  438. // by converting our clip-space position into the Wii's screen-space.
  439. // Acquire the right pixel and then convert it back.
  440. out.Write("if (o.pos.w == 1.0f)\n"
  441. "{{\n");
  442. out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
  443. "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
  444. out.Write("\tss_pixel_x = round(ss_pixel_x);\n"
  445. "\tss_pixel_y = round(ss_pixel_y);\n");
  446. out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
  447. "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
  448. "}}\n");
  449. }
  450. if (host_config.backend_geometry_shaders)
  451. {
  452. AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config);
  453. }
  454. else
  455. {
  456. // TODO: Pass interface blocks between shader stages even if geometry shaders
  457. // are not supported, however that will require at least OpenGL 3.2 support.
  458. for (u32 i = 0; i < num_texgen; ++i)
  459. out.Write("tex{}.xyz = o.tex{};\n", i, i);
  460. if (!host_config.fast_depth_calc)
  461. out.Write("clipPos = o.clipPos;\n");
  462. if (per_pixel_lighting)
  463. {
  464. out.Write("Normal = o.Normal;\n"
  465. "WorldPos = o.WorldPos;\n");
  466. }
  467. out.Write("colors_0 = o.colors_0;\n"
  468. "colors_1 = o.colors_1;\n");
  469. }
  470. if (host_config.backend_depth_clamp)
  471. {
  472. out.Write("gl_ClipDistance[0] = clipDist0;\n"
  473. "gl_ClipDistance[1] = clipDist1;\n");
  474. }
  475. // Vulkan NDC space has Y pointing down (right-handed NDC space).
  476. if (api_type == APIType::Vulkan)
  477. out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
  478. else
  479. out.Write("gl_Position = o.pos;\n");
  480. out.Write("}}\n");
  481. return out;
  482. }
  483. static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config,
  484. u32 num_texgen, ShaderCode& out)
  485. {
  486. // The HLSL compiler complains that the output texture coordinates are uninitialized when trying
  487. // to dynamically index them.
  488. for (u32 i = 0; i < num_texgen; i++)
  489. out.Write("o.tex{} = float3(0.0, 0.0, 0.0);\n", i);
  490. out.Write("// Texture coordinate generation\n");
  491. if (num_texgen == 1)
  492. {
  493. out.Write("{{ const uint texgen = 0u;\n");
  494. }
  495. else
  496. {
  497. out.Write("for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", num_texgen);
  498. }
  499. out.Write(" // Texcoord transforms\n");
  500. out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
  501. " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
  502. out.Write(" switch ({}) {{\n", BitfieldExtract<&TexMtxInfo::sourcerow>("texMtxInfo"));
  503. out.Write(" case {:s}:\n", SourceRow::Geom);
  504. out.Write(" coord.xyz = rawpos.xyz;\n");
  505. out.Write(" break;\n\n");
  506. out.Write(" case {:s}:\n", SourceRow::Normal);
  507. out.Write(" if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
  508. " {{\n",
  509. Common::ToUnderlying(VB_HAS_NORMAL));
  510. LoadVertexAttribute(out, host_config, 6, "rawnormal", "float3", "float3");
  511. out.Write(" coord.xyz = rawnormal.xyz;\n"
  512. " }}\n"
  513. " break;\n\n");
  514. out.Write(" case {:s}:\n", SourceRow::BinormalT);
  515. out.Write(" if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
  516. " {{\n",
  517. Common::ToUnderlying(VB_HAS_TANGENT));
  518. LoadVertexAttribute(out, host_config, 6, "rawtangent", "float3", "float3");
  519. out.Write(" coord.xyz = rawtangent.xyz;\n"
  520. " }}\n"
  521. " break;\n\n");
  522. out.Write(" case {:s}:\n", SourceRow::BinormalB);
  523. out.Write(" if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
  524. " {{\n",
  525. Common::ToUnderlying(VB_HAS_BINORMAL));
  526. LoadVertexAttribute(out, host_config, 6, "rawbinormal", "float3", "float3");
  527. out.Write(" coord.xyz = rawbinormal.xyz;\n"
  528. " }}\n"
  529. " break;\n\n");
  530. for (u32 i = 0; i < 8; i++)
  531. {
  532. out.Write(" case {:s}:\n", static_cast<SourceRow>(Common::ToUnderlying(SourceRow::Tex0) + i));
  533. out.Write(" if ((components & {}u) != 0u) // VB_HAS_UV{}\n"
  534. " {{\n",
  535. VB_HAS_UV0 << i, i);
  536. LoadVertexAttribute(out, host_config, 6, fmt::format("rawtex{}", i), "float3", "rawtex",
  537. fmt::format("rawtex[{}][{}]", i / 4, i % 4));
  538. out.Write(" coord = float4(rawtex{}.x, rawtex{}.y, 1.0f, 1.0f);\n"
  539. " }}\n",
  540. i, i);
  541. out.Write(" break;\n\n");
  542. }
  543. out.Write(" }}\n"
  544. "\n");
  545. out.Write(" // Input form of AB11 sets z element to 1.0\n");
  546. out.Write(" if ({} == {:s}) // inputform == AB11\n",
  547. BitfieldExtract<&TexMtxInfo::inputform>("texMtxInfo"), TexInputForm::AB11);
  548. out.Write(" coord.z = 1.0f;\n"
  549. "\n");
  550. // Convert NaNs to 1 - needed to fix eyelids in Shadow the Hedgehog during cutscenes
  551. // See https://bugs.dolphin-emu.org/issues/11458
  552. out.Write(" // Convert NaN to 1\n");
  553. out.Write(" if (dolphin_isnan(coord.x)) coord.x = 1.0;\n");
  554. out.Write(" if (dolphin_isnan(coord.y)) coord.y = 1.0;\n");
  555. out.Write(" if (dolphin_isnan(coord.z)) coord.z = 1.0;\n");
  556. out.Write(" // first transformation\n");
  557. out.Write(" uint texgentype = {};\n", BitfieldExtract<&TexMtxInfo::texgentype>("texMtxInfo"));
  558. out.Write(" float3 output_tex;\n"
  559. " switch (texgentype)\n"
  560. " {{\n");
  561. out.Write(" case {:s}:\n", TexGenType::EmbossMap);
  562. out.Write(" {{\n");
  563. out.Write(" uint light = {};\n",
  564. BitfieldExtract<&TexMtxInfo::embosslightshift>("texMtxInfo"));
  565. out.Write(" uint source = {};\n",
  566. BitfieldExtract<&TexMtxInfo::embosssourceshift>("texMtxInfo"));
  567. out.Write(" switch (source) {{\n");
  568. for (u32 i = 0; i < num_texgen; i++)
  569. out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
  570. out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
  571. " }}\n"
  572. " float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
  573. " output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n"
  574. " }}\n"
  575. " break;\n\n");
  576. out.Write(" case {:s}:\n", TexGenType::Color0);
  577. out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
  578. " break;\n\n");
  579. out.Write(" case {:s}:\n", TexGenType::Color1);
  580. out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
  581. " break;\n\n");
  582. out.Write(" case {:s}:\n", TexGenType::Regular);
  583. out.Write(" default:\n"
  584. " {{\n");
  585. out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
  586. Common::ToUnderlying(VB_HAS_TEXMTXIDX0));
  587. if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand)
  588. {
  589. out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, "
  590. "vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n"
  591. "\n");
  592. }
  593. else
  594. {
  595. out.Write(
  596. " // This is messy, due to dynamic indexing of the input texture coordinates.\n"
  597. " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
  598. " int tmp = 0;\n"
  599. " switch (texgen) {{\n");
  600. for (u32 i = 0; i < num_texgen; i++)
  601. out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
  602. out.Write(" }}\n"
  603. "\n");
  604. }
  605. out.Write(" if ({} == {:s}) {{\n", BitfieldExtract<&TexMtxInfo::projection>("texMtxInfo"),
  606. TexSize::STQ);
  607. out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
  608. " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
  609. " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
  610. " }} else {{\n"
  611. " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
  612. " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
  613. " 1.0);\n"
  614. " }}\n"
  615. " }} else {{\n");
  616. out.Write(" if ({} == {:s}) {{\n", BitfieldExtract<&TexMtxInfo::projection>("texMtxInfo"),
  617. TexSize::STQ);
  618. out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
  619. " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
  620. " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
  621. " }} else {{\n"
  622. " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
  623. " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
  624. " 1.0);\n"
  625. " }}\n"
  626. " }}\n"
  627. " }}\n"
  628. " break;\n\n"
  629. " }}\n"
  630. "\n");
  631. out.Write(" if (xfmem_dualTexInfo != 0u) {{\n");
  632. out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
  633. out.Write(" uint base_index = {};\n", BitfieldExtract<&PostMtxInfo::index>("postMtxInfo"));
  634. out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
  635. " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
  636. " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
  637. "\n");
  638. out.Write(" if ({} != 0u)\n", BitfieldExtract<&PostMtxInfo::normalize>("postMtxInfo"));
  639. out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n"
  640. "\n"
  641. " // multiply by postmatrix\n"
  642. " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
  643. " dot(P1.xyz, output_tex.xyz) + P1.w,\n"
  644. " dot(P2.xyz, output_tex.xyz) + P2.w);\n"
  645. " }}\n\n");
  646. // When q is 0, the GameCube appears to have a special case
  647. // This can be seen in devkitPro's neheGX Lesson08 example for Wii
  648. // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
  649. out.Write(" if (texgentype == {:s} && output_tex.z == 0.0)\n", TexGenType::Regular);
  650. out.Write(
  651. " output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
  652. "\n");
  653. out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
  654. out.Write(" switch (texgen) {{\n");
  655. for (u32 i = 0; i < num_texgen; i++)
  656. out.Write(" case {}u: o.tex{} = output_tex; break;\n", i, i);
  657. out.Write(" }}\n"
  658. "}}\n");
  659. }
  660. static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent,
  661. std::string_view name, std::string_view shader_type,
  662. std::string_view stored_type, std::string_view offset_name)
  663. {
  664. if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand)
  665. {
  666. code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent,
  667. shader_type, name, shader_type, stored_type,
  668. offset_name.empty() ? name : offset_name);
  669. }
  670. // else inputs are always available
  671. }
  672. void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)
  673. {
  674. VertexShaderUid uid;
  675. for (u32 texgens = 0; texgens <= 8; texgens++)
  676. {
  677. vertex_ubershader_uid_data* const vuid = uid.GetUidData();
  678. vuid->num_texgens = texgens;
  679. callback(uid);
  680. }
  681. }
  682. } // namespace UberShader