VertexShaderGen.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. // Copyright 2008 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoCommon/VertexShaderGen.h"
  4. #include "Common/Assert.h"
  5. #include "Common/CommonTypes.h"
  6. #include "VideoCommon/BPMemory.h"
  7. #include "VideoCommon/ConstantManager.h"
  8. #include "VideoCommon/LightingShaderGen.h"
  9. #include "VideoCommon/NativeVertexFormat.h"
  10. #include "VideoCommon/VertexLoaderManager.h"
  11. #include "VideoCommon/VideoCommon.h"
  12. #include "VideoCommon/VideoConfig.h"
  13. #include "VideoCommon/XFMemory.h"
  14. VertexShaderUid GetVertexShaderUid()
  15. {
  16. ASSERT(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
  17. ASSERT(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
  18. VertexShaderUid out;
  19. vertex_shader_uid_data* const uid_data = out.GetUidData();
  20. uid_data->numTexGens = xfmem.numTexGen.numTexGens;
  21. uid_data->components = VertexLoaderManager::g_current_components;
  22. uid_data->numColorChans = xfmem.numChan.numColorChans;
  23. GetLightingShaderUid(uid_data->lighting);
  24. // transform texcoords
  25. for (u32 i = 0; i < uid_data->numTexGens; ++i)
  26. {
  27. auto& texinfo = uid_data->texMtxInfo[i];
  28. texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow;
  29. texinfo.texgentype = xfmem.texMtxInfo[i].texgentype;
  30. texinfo.inputform = xfmem.texMtxInfo[i].inputform;
  31. // first transformation
  32. switch (texinfo.texgentype)
  33. {
  34. case TexGenType::EmbossMap: // calculate tex coords into bump map
  35. if ((uid_data->components & (VB_HAS_TANGENT | VB_HAS_BINORMAL)) != 0)
  36. {
  37. // transform the light dir into tangent space
  38. texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
  39. texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
  40. }
  41. else
  42. {
  43. texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
  44. }
  45. break;
  46. case TexGenType::Color0:
  47. case TexGenType::Color1:
  48. break;
  49. case TexGenType::Regular:
  50. default:
  51. uid_data->texMtxInfo_n_projection |= static_cast<u32>(xfmem.texMtxInfo[i].projection.Value())
  52. << i;
  53. break;
  54. }
  55. uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
  56. // CHECKME: does this only work for regular tex gen types?
  57. if (uid_data->dualTexTrans_enabled && texinfo.texgentype == TexGenType::Regular)
  58. {
  59. auto& postInfo = uid_data->postMtxInfo[i];
  60. postInfo.index = xfmem.postMtxInfo[i].index;
  61. postInfo.normalize = xfmem.postMtxInfo[i].normalize;
  62. }
  63. }
  64. return out;
  65. }
  66. ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& host_config,
  67. const vertex_shader_uid_data* uid_data)
  68. {
  69. ShaderCode out;
  70. const bool per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
  71. const bool msaa = host_config.msaa;
  72. const bool ssaa = host_config.ssaa;
  73. const bool vertex_rounding = host_config.vertex_rounding;
  74. ShaderCode input_extract;
  75. out.Write("{}", s_lighting_struct);
  76. // uniforms
  77. out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
  78. out.Write("{}", s_shader_uniforms);
  79. out.Write("}};\n");
  80. if (uid_data->vs_expand != VSExpand::None)
  81. {
  82. out.Write("UBO_BINDING(std140, 4) uniform GSBlock {{\n");
  83. out.Write("{}", s_geometry_shader_uniforms);
  84. out.Write("}};\n");
  85. if (api_type == APIType::D3D)
  86. {
  87. // D3D doesn't include the base vertex in SV_VertexID
  88. out.Write("UBO_BINDING(std140, 5) uniform DX_Constants {{\n"
  89. " uint base_vertex;\n"
  90. "}};\n\n");
  91. }
  92. }
  93. out.Write("struct VS_OUTPUT {{\n");
  94. GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "",
  95. ShaderStage::Vertex);
  96. out.Write("}};\n\n");
  97. WriteIsNanHeader(out, api_type);
  98. if (uid_data->vs_expand == VSExpand::None)
  99. {
  100. out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawpos;\n", ShaderAttrib::Position);
  101. if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
  102. out.Write("ATTRIBUTE_LOCATION({:s}) in uint4 posmtx;\n", ShaderAttrib::PositionMatrix);
  103. if ((uid_data->components & VB_HAS_NORMAL) != 0)
  104. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawnormal;\n", ShaderAttrib::Normal);
  105. if ((uid_data->components & VB_HAS_TANGENT) != 0)
  106. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawtangent;\n", ShaderAttrib::Tangent);
  107. if ((uid_data->components & VB_HAS_BINORMAL) != 0)
  108. out.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawbinormal;\n", ShaderAttrib::Binormal);
  109. if ((uid_data->components & VB_HAS_COL0) != 0)
  110. out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawcolor0;\n", ShaderAttrib::Color0);
  111. if ((uid_data->components & VB_HAS_COL1) != 0)
  112. out.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawcolor1;\n", ShaderAttrib::Color1);
  113. for (u32 i = 0; i < 8; ++i)
  114. {
  115. const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
  116. if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
  117. {
  118. out.Write("ATTRIBUTE_LOCATION({:s}) in float{} rawtex{};\n", ShaderAttrib::TexCoord0 + i,
  119. has_texmtx != 0 ? 3 : 2, i);
  120. }
  121. }
  122. }
  123. else
  124. {
  125. // Can't use float3, etc because we want 4-byte alignment
  126. out.Write(
  127. "uint4 unpack_ubyte4(uint value) {{\n"
  128. " return uint4(value & 0xffu, (value >> 8) & 0xffu, (value >> 16) & 0xffu, value >> 24);\n"
  129. "}}\n\n"
  130. "struct InputData {{\n");
  131. if (uid_data->components & VB_HAS_POSMTXIDX)
  132. {
  133. out.Write(" uint posmtx;\n");
  134. input_extract.Write("uint4 posmtx = unpack_ubyte4(i.posmtx);\n");
  135. }
  136. if (uid_data->position_has_3_elems)
  137. {
  138. out.Write(" float pos0;\n"
  139. " float pos1;\n"
  140. " float pos2;\n");
  141. input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, i.pos2, 1.0f);\n");
  142. }
  143. else
  144. {
  145. out.Write(" float pos0;\n"
  146. " float pos1;\n");
  147. input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, 0.0f, 1.0f);\n");
  148. }
  149. std::array<std::string_view, 3> names = {"normal", "binormal", "tangent"};
  150. for (int i = 0; i < 3; i++)
  151. {
  152. if (uid_data->components & (VB_HAS_NORMAL << i))
  153. {
  154. out.Write(" float {0}0;\n"
  155. " float {0}1;\n"
  156. " float {0}2;\n",
  157. names[i]);
  158. input_extract.Write("float3 raw{0} = float3(i.{0}0, i.{0}1, i.{0}2);\n", names[i]);
  159. }
  160. }
  161. for (int i = 0; i < 2; i++)
  162. {
  163. if (uid_data->components & (VB_HAS_COL0 << i))
  164. {
  165. out.Write(" uint color{};\n", i);
  166. input_extract.Write("float4 rawcolor{0} = float4(unpack_ubyte4(i.color{0})) / 255.0f;\n",
  167. i);
  168. }
  169. }
  170. for (int i = 0; i < 8; i++)
  171. {
  172. if (uid_data->components & (VB_HAS_UV0 << i))
  173. {
  174. u32 ncomponents = (uid_data->texcoord_elem_count >> (2 * i)) & 3;
  175. if (ncomponents < 2)
  176. {
  177. out.Write(" float tex{};\n", i);
  178. input_extract.Write("float3 rawtex{0} = float3(i.tex{0}, 0.0f, 0.0f);\n", i);
  179. }
  180. else if (ncomponents == 2)
  181. {
  182. out.Write(" float tex{0}_0;\n"
  183. " float tex{0}_1;\n",
  184. i);
  185. input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, 0.0f);\n", i);
  186. }
  187. else
  188. {
  189. out.Write(" float tex{0}_0;\n"
  190. " float tex{0}_1;\n"
  191. " float tex{0}_2;\n",
  192. i);
  193. input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, i.tex{0}_2);\n",
  194. i);
  195. }
  196. }
  197. }
  198. out.Write("}};\n\n"
  199. "SSBO_BINDING(1) readonly restrict buffer InputBuffer {{\n"
  200. " InputData input_buffer[];\n"
  201. "}};\n\n");
  202. }
  203. if (host_config.backend_geometry_shaders)
  204. {
  205. out.Write("VARYING_LOCATION(0) out VertexData {{\n");
  206. GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
  207. GetInterpolationQualifier(msaa, ssaa, true, false),
  208. ShaderStage::Vertex);
  209. out.Write("}} vs;\n");
  210. }
  211. else
  212. {
  213. // Let's set up attributes
  214. u32 counter = 0;
  215. out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
  216. GetInterpolationQualifier(msaa, ssaa));
  217. out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
  218. GetInterpolationQualifier(msaa, ssaa));
  219. for (u32 i = 0; i < uid_data->numTexGens; ++i)
  220. {
  221. out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
  222. GetInterpolationQualifier(msaa, ssaa), i);
  223. }
  224. if (!host_config.fast_depth_calc)
  225. {
  226. out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
  227. GetInterpolationQualifier(msaa, ssaa));
  228. }
  229. if (per_pixel_lighting)
  230. {
  231. out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
  232. GetInterpolationQualifier(msaa, ssaa));
  233. out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
  234. GetInterpolationQualifier(msaa, ssaa));
  235. }
  236. }
  237. out.Write("void main()\n{{\n");
  238. if (uid_data->vs_expand != VSExpand::None)
  239. {
  240. out.Write("bool is_bottom = (gl_VertexID & 2) != 0;\n"
  241. "bool is_right = (gl_VertexID & 1) != 0;\n");
  242. // D3D doesn't include the base vertex in SV_VertexID
  243. // See comment in UberShaderVertex for details
  244. if (api_type == APIType::D3D)
  245. out.Write("uint vertex_id = (gl_VertexID >> 2) + base_vertex;\n");
  246. else
  247. out.Write("uint vertex_id = uint(gl_VertexID) >> 2u;\n");
  248. out.Write("InputData i = input_buffer[vertex_id];\n"
  249. "{}",
  250. input_extract.GetBuffer());
  251. }
  252. out.Write("VS_OUTPUT o;\n");
  253. // xfmem.numColorChans controls the number of color channels available to TEV, but we still need
  254. // to generate all channels here, as it can be used in texgen. Cel-damage is an example of this.
  255. out.Write("float4 vertex_color_0, vertex_color_1;\n");
  256. // To use color 1, the vertex descriptor must have color 0 and 1.
  257. // If color 1 is present but not color 0, it is used for lighting channel 0.
  258. const bool use_color_1 =
  259. (uid_data->components & (VB_HAS_COL0 | VB_HAS_COL1)) == (VB_HAS_COL0 | VB_HAS_COL1);
  260. for (u32 color = 0; color < NUM_XF_COLOR_CHANNELS; color++)
  261. {
  262. if ((color == 0 || use_color_1) && (uid_data->components & (VB_HAS_COL0 << color)) != 0)
  263. {
  264. // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are present.
  265. out.Write("vertex_color_{0} = rawcolor{0};\n", color);
  266. }
  267. else if (color == 0 && (uid_data->components & VB_HAS_COL1) != 0)
  268. {
  269. // Use color1 for channel 0 if color0 is not present.
  270. out.Write("vertex_color_{} = rawcolor1;\n", color);
  271. }
  272. else
  273. {
  274. out.Write("vertex_color_{0} = missing_color_value;\n", color);
  275. }
  276. }
  277. // transforms
  278. if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
  279. {
  280. // Vertex format has a per-vertex matrix
  281. out.Write("int posidx = int(posmtx.r);\n"
  282. "float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n"
  283. "float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n"
  284. "float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n"
  285. "int normidx = posidx & 31;\n"
  286. "float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
  287. "float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
  288. "float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
  289. }
  290. else
  291. {
  292. // One shared matrix
  293. out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n"
  294. "float4 P1 = " I_POSNORMALMATRIX "[1];\n"
  295. "float4 P2 = " I_POSNORMALMATRIX "[2];\n"
  296. "float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
  297. "float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
  298. "float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
  299. }
  300. out.Write("// Multiply the position vector by the position matrix\n"
  301. "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
  302. if ((uid_data->components & VB_HAS_NORMAL) == 0)
  303. out.Write("float3 rawnormal = " I_CACHED_NORMAL ".xyz;\n");
  304. if ((uid_data->components & VB_HAS_TANGENT) == 0)
  305. out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
  306. if ((uid_data->components & VB_HAS_BINORMAL) == 0)
  307. out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
  308. // The scale of the transform matrix is used to control the size of the emboss map effect, by
  309. // changing the scale of the transformed binormals (which only get used by emboss map texgens).
  310. // By normalising the first transformed normal (which is used by lighting calculations and needs
  311. // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
  312. // and not scaling for lighting.
  313. out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
  314. "rawnormal)));\n"
  315. "float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
  316. "rawtangent));\n"
  317. "float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
  318. "rawbinormal));\n");
  319. out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
  320. "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
  321. out.Write("int4 lacc;\n"
  322. "float3 ldir, h, cosAttn, distAttn;\n"
  323. "float dist, dist2, attn;\n");
  324. GenerateLightingShaderCode(out, uid_data->lighting, "vertex_color_", "o.colors_");
  325. // transform texcoords
  326. out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
  327. for (u32 i = 0; i < uid_data->numTexGens; ++i)
  328. {
  329. auto& texinfo = uid_data->texMtxInfo[i];
  330. out.Write("{{\n");
  331. out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
  332. switch (texinfo.sourcerow)
  333. {
  334. case SourceRow::Geom:
  335. out.Write("coord.xyz = rawpos.xyz;\n");
  336. break;
  337. case SourceRow::Normal:
  338. if ((uid_data->components & VB_HAS_NORMAL) != 0)
  339. {
  340. out.Write("coord.xyz = rawnormal.xyz;\n");
  341. }
  342. break;
  343. case SourceRow::Colors:
  344. ASSERT(texinfo.texgentype == TexGenType::Color0 || texinfo.texgentype == TexGenType::Color1);
  345. break;
  346. case SourceRow::BinormalT:
  347. if ((uid_data->components & VB_HAS_TANGENT) != 0)
  348. {
  349. out.Write("coord.xyz = rawtangent.xyz;\n");
  350. }
  351. break;
  352. case SourceRow::BinormalB:
  353. if ((uid_data->components & VB_HAS_BINORMAL) != 0)
  354. {
  355. out.Write("coord.xyz = rawbinormal.xyz;\n");
  356. }
  357. break;
  358. default:
  359. ASSERT(texinfo.sourcerow >= SourceRow::Tex0 && texinfo.sourcerow <= SourceRow::Tex7);
  360. u32 texnum = static_cast<u32>(texinfo.sourcerow) - static_cast<u32>(SourceRow::Tex0);
  361. if ((uid_data->components & (VB_HAS_UV0 << (texnum))) != 0)
  362. {
  363. out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", texnum, texnum);
  364. }
  365. break;
  366. }
  367. // Input form of AB11 sets z element to 1.0
  368. if (texinfo.inputform == TexInputForm::AB11)
  369. out.Write("coord.z = 1.0;\n");
  370. // Convert NaNs to 1 - needed to fix eyelids in Shadow the Hedgehog during cutscenes
  371. // See https://bugs.dolphin-emu.org/issues/11458
  372. out.Write("// Convert NaN to 1\n");
  373. out.Write("if (dolphin_isnan(coord.x)) coord.x = 1.0;\n");
  374. out.Write("if (dolphin_isnan(coord.y)) coord.y = 1.0;\n");
  375. out.Write("if (dolphin_isnan(coord.z)) coord.z = 1.0;\n");
  376. // first transformation
  377. switch (texinfo.texgentype)
  378. {
  379. case TexGenType::EmbossMap: // calculate tex coords into bump map
  380. // transform the light dir into tangent space
  381. out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
  382. LIGHT_POS_PARAMS(texinfo.embosslightshift));
  383. out.Write(
  384. "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n",
  385. i, texinfo.embosssourceshift);
  386. break;
  387. case TexGenType::Color0:
  388. out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
  389. break;
  390. case TexGenType::Color1:
  391. out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
  392. break;
  393. case TexGenType::Regular:
  394. default:
  395. if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0)
  396. {
  397. out.Write("int tmp = int(rawtex{}.z);\n", i);
  398. if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
  399. {
  400. out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
  401. "[tmp]), dot(coord, " I_TRANSFORMMATRICES
  402. "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
  403. i);
  404. }
  405. else
  406. {
  407. out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
  408. "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
  409. i);
  410. }
  411. }
  412. else
  413. {
  414. if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
  415. {
  416. out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
  417. "[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
  418. "[{}]));\n",
  419. i, 3 * i, 3 * i + 1, 3 * i + 2);
  420. }
  421. else
  422. {
  423. out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
  424. "[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n",
  425. i, 3 * i, 3 * i + 1);
  426. }
  427. }
  428. break;
  429. }
  430. // CHECKME: does this only work for regular tex gen types?
  431. if (uid_data->dualTexTrans_enabled && texinfo.texgentype == TexGenType::Regular)
  432. {
  433. auto& postInfo = uid_data->postMtxInfo[i];
  434. out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
  435. "float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n"
  436. "float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n",
  437. postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
  438. if (postInfo.normalize)
  439. out.Write("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i);
  440. // multiply by postmatrix
  441. out.Write(
  442. "o.tex{0}.xyz = float3(dot(P0.xyz, o.tex{0}.xyz) + P0.w, dot(P1.xyz, o.tex{0}.xyz) + "
  443. "P1.w, dot(P2.xyz, o.tex{0}.xyz) + P2.w);\n",
  444. i);
  445. }
  446. // When q is 0, the GameCube appears to have a special case
  447. // This can be seen in devkitPro's neheGX Lesson08 example for Wii
  448. // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
  449. // TODO: check if this only affects XF_TEXGEN_REGULAR
  450. if (texinfo.texgentype == TexGenType::Regular)
  451. {
  452. out.Write(
  453. "if(o.tex{0}.z == 0.0f)\n"
  454. "\to.tex{0}.xy = clamp(o.tex{0}.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n",
  455. i);
  456. }
  457. out.Write("}}\n");
  458. }
  459. if (uid_data->vs_expand == VSExpand::Line)
  460. {
  461. out.Write("// Line expansion\n"
  462. "uint other_id = vertex_id;\n"
  463. "if (is_bottom) {{\n"
  464. " other_id -= 1u;\n"
  465. "}} else {{\n"
  466. " other_id += 1u;\n"
  467. "}}\n"
  468. "InputData other = input_buffer[other_id];\n");
  469. if (uid_data->position_has_3_elems)
  470. out.Write("float4 other_pos = float4(other.pos0, other.pos1, other.pos2, 1.0f);\n");
  471. else
  472. out.Write("float4 other_pos = float4(other.pos0, other.pos1, 0.0f, 1.0f);\n");
  473. if (uid_data->components & VB_HAS_POSMTXIDX)
  474. {
  475. out.Write("uint other_posidx = other.posmtx & 0xff;\n"
  476. "float4 other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n"
  477. "float4 other_p1 = " I_TRANSFORMMATRICES "[other_posidx + 1];\n"
  478. "float4 other_p2 = " I_TRANSFORMMATRICES "[other_posidx + 2];\n"
  479. "other_pos = float4(dot(other_p0, other_pos), dot(other_p1, other_pos), "
  480. "dot(other_p2, other_pos), 1.0f);\n");
  481. }
  482. else
  483. {
  484. out.Write("other_pos = float4(dot(P0, other_pos), dot(P1, other_pos), dot(P2, other_pos), "
  485. "1.0f);\n");
  486. }
  487. GenerateVSLineExpansion(out, "", uid_data->numTexGens);
  488. }
  489. else if (uid_data->vs_expand == VSExpand::Point)
  490. {
  491. out.Write("// Point expansion\n");
  492. GenerateVSPointExpansion(out, "", uid_data->numTexGens);
  493. }
  494. if (per_pixel_lighting)
  495. {
  496. // When per-pixel lighting is enabled, the vertex colors are passed through
  497. // unmodified so we can evaluate the lighting in the pixel shader.
  498. // Lighting is also still computed in the vertex shader since it can be used to
  499. // generate texture coordinates. We generated them above, so now the colors can
  500. // be reverted to their previous stage.
  501. out.Write("o.colors_0 = vertex_color_0;\n");
  502. out.Write("o.colors_1 = vertex_color_1;\n");
  503. // Note that the numColorChans logic is performed in the pixel shader.
  504. }
  505. else
  506. {
  507. // The number of colors available to TEV is determined by numColorChans.
  508. // We have to provide the fields to match the interface, so set to zero if it's not enabled.
  509. if (uid_data->numColorChans == 0)
  510. out.Write("o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);\n");
  511. if (uid_data->numColorChans <= 1)
  512. out.Write("o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);\n");
  513. }
  514. // clipPos/w needs to be done in pixel shader, not here
  515. if (!host_config.fast_depth_calc)
  516. out.Write("o.clipPos = o.pos;\n");
  517. if (per_pixel_lighting)
  518. {
  519. out.Write("o.Normal = _normal;\n"
  520. "o.WorldPos = pos.xyz;\n");
  521. }
  522. // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
  523. // our own depth clipping and calculate the depth range before the perspective divide if
  524. // necessary.
  525. if (host_config.backend_depth_clamp)
  526. {
  527. // Since we're adjusting z for the depth range before the perspective divide, we have to do our
  528. // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
  529. // We adjust our depth value for clipping purposes to match the perspective projection in the
  530. // software backend, which is a hack to fix Sonic Adventure and Unleashed games.
  531. out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
  532. "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
  533. "float clipDist1 = -clipDepth;\n"); // Far: z > 0
  534. if (host_config.backend_geometry_shaders)
  535. {
  536. out.Write("o.clipDist0 = clipDist0;\n"
  537. "o.clipDist1 = clipDist1;\n");
  538. }
  539. }
  540. else
  541. {
  542. // Same depth adjustment for Sonic. Without depth clamping, it unfortunately
  543. // affects non-clipping uses of depth too.
  544. out.Write("o.pos.z = o.pos.z * (1.0 - 1e-7);\n");
  545. }
  546. // Write the true depth value. If the game uses depth textures, then the pixel shader will
  547. // override it with the correct values if not then early z culling will improve speed.
  548. // There are two different ways to do this, when the depth range is oversized, we process
  549. // the depth range in the vertex shader, if not we let the host driver handle it.
  550. //
  551. // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
  552. // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
  553. // We have to handle the depth range in the vertex shader instead of after the perspective
  554. // divide, because some games will use a depth range larger than what is allowed by the
  555. // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
  556. // games effectively add a depth bias to the values written to the depth buffer.
  557. out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
  558. "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
  559. if (!host_config.backend_clip_control)
  560. {
  561. // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
  562. // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
  563. // operation that can introduce a round-trip error.
  564. out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
  565. }
  566. // Correct for negative viewports by mirroring all vertices. We need to negate the height here,
  567. // since the viewport height is already negated by the render backend.
  568. out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
  569. // The console GPU places the pixel center at 7/12 in screen space unless
  570. // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
  571. // in some primitives being placed one pixel too far to the bottom-right,
  572. // which in turn can be critical if it happens for clear quads.
  573. // Hence, we compensate for this pixel center difference so that primitives
  574. // get rasterized correctly.
  575. out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
  576. if (vertex_rounding)
  577. {
  578. // By now our position is in clip space
  579. // however, higher resolutions than the Wii outputs
  580. // cause an additional pixel offset
  581. // due to a higher pixel density
  582. // we need to correct this by converting our
  583. // clip-space position into the Wii's screen-space
  584. // acquire the right pixel and then convert it back
  585. out.Write("if (o.pos.w == 1.0f)\n"
  586. "{{\n"
  587. "\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
  588. "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"
  589. "\tss_pixel_x = round(ss_pixel_x);\n"
  590. "\tss_pixel_y = round(ss_pixel_y);\n"
  591. "\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
  592. "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
  593. "}}\n");
  594. }
  595. if (host_config.backend_geometry_shaders)
  596. {
  597. AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, host_config);
  598. }
  599. else
  600. {
  601. // TODO: Pass interface blocks between shader stages even if geometry shaders
  602. // are not supported, however that will require at least OpenGL 3.2 support.
  603. for (u32 i = 0; i < uid_data->numTexGens; ++i)
  604. out.Write("tex{}.xyz = o.tex{};\n", i, i);
  605. if (!host_config.fast_depth_calc)
  606. out.Write("clipPos = o.clipPos;\n");
  607. if (per_pixel_lighting)
  608. {
  609. out.Write("Normal = o.Normal;\n"
  610. "WorldPos = o.WorldPos;\n");
  611. }
  612. out.Write("colors_0 = o.colors_0;\n"
  613. "colors_1 = o.colors_1;\n");
  614. }
  615. if (host_config.backend_depth_clamp)
  616. {
  617. out.Write("gl_ClipDistance[0] = clipDist0;\n"
  618. "gl_ClipDistance[1] = clipDist1;\n");
  619. }
  620. // Vulkan NDC space has Y pointing down (right-handed NDC space).
  621. if (api_type == APIType::Vulkan)
  622. out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
  623. else
  624. out.Write("gl_Position = o.pos;\n");
  625. out.Write("}}\n");
  626. return out;
  627. }