UberShaderPixel.cpp 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774
  1. // Copyright 2015 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoCommon/UberShaderPixel.h"
  4. #include "Common/Assert.h"
  5. #include "VideoCommon/BPMemory.h"
  6. #include "VideoCommon/DriverDetails.h"
  7. #include "VideoCommon/NativeVertexFormat.h"
  8. #include "VideoCommon/PixelShaderGen.h"
  9. #include "VideoCommon/ShaderGenCommon.h"
  10. #include "VideoCommon/UberShaderCommon.h"
  11. #include "VideoCommon/VideoCommon.h"
  12. #include "VideoCommon/VideoConfig.h"
  13. #include "VideoCommon/XFMemory.h"
  14. namespace UberShader
  15. {
  16. namespace
  17. {
  18. void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_texgen, bool per_pixel_lighting)
  19. {
  20. out->Write("\tCustomShaderData custom_data;\n");
  21. if (per_pixel_lighting)
  22. {
  23. out->Write("\tcustom_data.position = WorldPos;\n");
  24. out->Write("\tcustom_data.normal = Normal;\n");
  25. }
  26. else
  27. {
  28. out->Write("\tcustom_data.position = float3(0, 0, 0);\n");
  29. out->Write("\tcustom_data.normal = float3(0, 0, 0);\n");
  30. }
  31. if (num_texgen == 0) [[unlikely]]
  32. {
  33. out->Write("\tcustom_data.texcoord[0] = float3(0, 0, 0);\n");
  34. }
  35. else
  36. {
  37. for (u32 i = 0; i < num_texgen; ++i)
  38. {
  39. out->Write("\tif (tex{0}.z == 0.0)\n", i);
  40. out->Write("\t{{\n");
  41. out->Write("\t\tcustom_data.texcoord[{0}] = tex{0};\n", i);
  42. out->Write("\t}}\n");
  43. out->Write("\telse {{\n");
  44. out->Write("\t\tcustom_data.texcoord[{0}] = float3(tex{0}.xy / tex{0}.z, 0);\n", i);
  45. out->Write("\t}}\n");
  46. }
  47. }
  48. out->Write("\tcustom_data.texcoord_count = {};\n", num_texgen);
  49. for (u32 i = 0; i < 8; i++)
  50. {
  51. // Shader compilation complains if every index isn't initialized
  52. out->Write("\tcustom_data.texmap_to_texcoord_index[{0}] = {0};\n", i);
  53. }
  54. for (u32 i = 0; i < NUM_XF_COLOR_CHANNELS; i++)
  55. {
  56. out->Write("\tcustom_data.base_material[{}] = vec4(0, 0, 0, 1);\n", i);
  57. out->Write("\tcustom_data.ambient_lighting[{}] = vec4(0, 0, 0, 1);\n", i);
  58. // Shader compilation errors can throw if not everything is initialized
  59. for (u32 light_count_index = 0; light_count_index < 8; light_count_index++)
  60. {
  61. // Color
  62. out->Write("\tcustom_data.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n", i,
  63. light_count_index);
  64. out->Write("\tcustom_data.lights_chan{}_color[{}].position = float3(0, 0, 0);\n", i,
  65. light_count_index);
  66. out->Write("\tcustom_data.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", i,
  67. light_count_index);
  68. out->Write("\tcustom_data.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n", i,
  69. light_count_index);
  70. out->Write("\tcustom_data.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n", i,
  71. light_count_index);
  72. out->Write("\tcustom_data.lights_chan{}_color[{}].attenuation_type = 0;\n", i,
  73. light_count_index);
  74. // Alpha
  75. out->Write("\tcustom_data.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n", i,
  76. light_count_index);
  77. out->Write("\tcustom_data.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n", i,
  78. light_count_index);
  79. out->Write("\tcustom_data.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", i,
  80. light_count_index);
  81. out->Write("\tcustom_data.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n", i,
  82. light_count_index);
  83. out->Write("\tcustom_data.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n", i,
  84. light_count_index);
  85. out->Write("\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = 0;\n", i,
  86. light_count_index);
  87. }
  88. out->Write("\tcustom_data.light_chan{}_color_count = 0;\n", i);
  89. out->Write("\tcustom_data.light_chan{}_alpha_count = 0;\n", i);
  90. }
  91. if (num_texgen > 0) [[likely]]
  92. {
  93. out->Write("\n");
  94. out->Write("\tfor(uint stage = 0u; stage <= num_stages; stage++)\n");
  95. out->Write("\t{{\n");
  96. out->Write("\t\tStageState ss;\n");
  97. out->Write("\t\tss.order = bpmem_tevorder(stage>>1);\n");
  98. out->Write("\t\tif ((stage & 1u) == 1u)\n");
  99. out->Write("\t\t\tss.order = ss.order >> {};\n\n",
  100. int(TwoTevStageOrders().enable_tex_odd.StartBit() -
  101. TwoTevStageOrders().enable_tex_even.StartBit()));
  102. out->Write("\t\tuint texmap = {};\n",
  103. BitfieldExtract<&TwoTevStageOrders::texcoord_even>("ss.order"));
  104. // Shader compilation is weird, shader arrays can't use indexing by variable
  105. // to set values unless the variable is an index in a for loop.
  106. // So instead we have to do this if check nonsense
  107. for (u32 i = 0; i < 8; i++)
  108. {
  109. out->Write("\t\tif (texmap == {})\n", i);
  110. out->Write("\t\t{{\n");
  111. out->Write("\t\t\tcustom_data.texmap_to_texcoord_index[{}] = selectTexCoordIndex(texmap);\n",
  112. i);
  113. out->Write("\t\t}}\n");
  114. }
  115. out->Write("\t}}\n");
  116. }
  117. if (per_pixel_lighting)
  118. {
  119. out->Write("\tuint light_count = 0;\n");
  120. out->Write("\tfor (uint chan = 0u; chan < {}u; chan++)\n", NUM_XF_COLOR_CHANNELS);
  121. out->Write("\t{{\n");
  122. out->Write("\t\tuint colorreg = xfmem_color(chan);\n");
  123. out->Write("\t\tuint alphareg = xfmem_alpha(chan);\n");
  124. for (const auto& color_type : std::array<std::string_view, 2>{"colorreg", "alphareg"})
  125. {
  126. if (color_type == "colorreg")
  127. {
  128. out->Write("\t\tcustom_data.base_material[0] = " I_MATERIALS "[2u] / 255.0; \n");
  129. out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type));
  130. out->Write("\t\t\tcustom_data.base_material[0] = colors_0; \n");
  131. }
  132. else
  133. {
  134. out->Write("custom_data.base_material[1].w = " I_MATERIALS "[3u].w / 255.0; \n");
  135. out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type));
  136. out->Write("\t\t\tcustom_data.base_material[1].w = colors_1.w; \n");
  137. }
  138. out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type));
  139. out->Write("\t\t{{\n");
  140. out->Write("\t\t\tuint light_mask = {} | ({} << 4u);\n",
  141. BitfieldExtract<&LitChannel::lightMask0_3>(color_type),
  142. BitfieldExtract<&LitChannel::lightMask4_7>(color_type));
  143. out->Write("\t\t\tuint attnfunc = {};\n", BitfieldExtract<&LitChannel::attnfunc>(color_type));
  144. out->Write("\t\t\tfor (uint light_index = 0u; light_index < 8u; light_index++)\n");
  145. out->Write("\t\t\t{{\n");
  146. out->Write("\t\t\t\tif ((light_mask & (1u << light_index)) != 0u)\n");
  147. out->Write("\t\t\t\t{{\n");
  148. // Shader compilation is weird, shader arrays can't use indexing by variable
  149. // to set values unless the variable is an index in a for loop.
  150. // So instead we have to do this if check nonsense
  151. for (u32 light_count_index = 0; light_count_index < 8; light_count_index++)
  152. {
  153. out->Write("\t\t\t\t\tif (light_index == {})\n", light_count_index);
  154. out->Write("\t\t\t\t\t{{\n");
  155. if (color_type == "colorreg")
  156. {
  157. for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++)
  158. {
  159. out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index);
  160. out->Write("\t\t\t\t\t\t{{\n");
  161. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].direction = " I_LIGHTS
  162. "[light_index].dir.xyz;\n",
  163. channel_index, light_count_index);
  164. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].position = " I_LIGHTS
  165. "[light_index].pos.xyz;\n",
  166. channel_index, light_count_index);
  167. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].cosatt = " I_LIGHTS
  168. "[light_index].cosatt;\n",
  169. channel_index, light_count_index);
  170. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].distatt = " I_LIGHTS
  171. "[light_index].distatt;\n",
  172. channel_index, light_count_index);
  173. out->Write(
  174. "\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].attenuation_type = attnfunc;\n",
  175. channel_index, light_count_index);
  176. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].color = " I_LIGHTS
  177. "[light_index].color.rgb / float3(255.0, 255.0, 255.0);\n",
  178. channel_index, light_count_index);
  179. out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_color_count += 1;\n", channel_index);
  180. out->Write("\t\t\t\t\t\t}}\n");
  181. }
  182. }
  183. else
  184. {
  185. for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++)
  186. {
  187. out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index);
  188. out->Write("\t\t\t\t\t\t{{\n");
  189. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].direction = " I_LIGHTS
  190. "[light_index].dir.xyz;\n",
  191. channel_index, light_count_index);
  192. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].position = " I_LIGHTS
  193. "[light_index].pos.xyz;\n",
  194. channel_index, light_count_index);
  195. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].cosatt = " I_LIGHTS
  196. "[light_index].cosatt;\n",
  197. channel_index, light_count_index);
  198. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].distatt = " I_LIGHTS
  199. "[light_index].distatt;\n",
  200. channel_index, light_count_index);
  201. out->Write(
  202. "\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = attnfunc;\n",
  203. channel_index, light_count_index);
  204. out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].color = float3(" I_LIGHTS
  205. "[light_index].color.a) / float3(255.0, 255.0, 255.0);\n",
  206. channel_index, light_count_index);
  207. out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_alpha_count += 1;\n", channel_index);
  208. out->Write("\t\t\t\t\t\t}}\n");
  209. }
  210. }
  211. out->Write("\t\t\t\t\t}}\n");
  212. }
  213. out->Write("\t\t\t\t}}\n");
  214. out->Write("\t\t\t}}\n");
  215. out->Write("\t\t}}\n");
  216. }
  217. out->Write("\t}}\n");
  218. }
  219. for (u32 i = 0; i < 16; i++)
  220. {
  221. // Shader compilation complains if every struct isn't initialized
  222. // Color Input
  223. for (u32 j = 0; j < 4; j++)
  224. {
  225. out->Write("\tcustom_data.tev_stages[{}].input_color[{}].input_type = "
  226. "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n",
  227. i, j);
  228. out->Write("\tcustom_data.tev_stages[{}].input_color[{}].value = "
  229. "float3(0, 0, 0);\n",
  230. i, j);
  231. }
  232. // Alpha Input
  233. for (u32 j = 0; j < 4; j++)
  234. {
  235. out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].input_type = "
  236. "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n",
  237. i, j);
  238. out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].value = "
  239. "float(0);\n",
  240. i, j);
  241. }
  242. // Texmap
  243. out->Write("\tcustom_data.tev_stages[{}].texmap = 0u;\n", i);
  244. // Output
  245. out->Write("\tcustom_data.tev_stages[{}].output_color = "
  246. "float4(0, 0, 0, 0);\n",
  247. i);
  248. }
  249. // Actual data will be filled out in the tev stage code, just set the
  250. // stage count for now
  251. out->Write("\tcustom_data.tev_stage_count = num_stages;\n");
  252. // Time
  253. out->Write("\tcustom_data.time_ms = time_ms;\n");
  254. }
  255. } // namespace
  256. PixelShaderUid GetPixelShaderUid()
  257. {
  258. PixelShaderUid out;
  259. pixel_ubershader_uid_data* const uid_data = out.GetUidData();
  260. uid_data->num_texgens = xfmem.numTexGen.numTexGens;
  261. uid_data->early_depth = bpmem.GetEmulatedZ() == EmulatedZ::Early &&
  262. (g_ActiveConfig.bFastDepthCalc ||
  263. bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) &&
  264. !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
  265. uid_data->per_pixel_depth =
  266. (bpmem.ztex2.op != ZTexOp::Disabled && bpmem.GetEmulatedZ() == EmulatedZ::Late) ||
  267. (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) ||
  268. (bpmem.zmode.testenable && bpmem.genMode.zfreeze);
  269. uid_data->uint_output = bpmem.blendmode.UseLogicOp();
  270. return out;
  271. }
  272. void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config,
  273. PixelShaderUid* uid)
  274. {
  275. pixel_ubershader_uid_data* const uid_data = uid->GetUidData();
  276. // With fbfetch, ubershaders always blend using that and don't use dual src
  277. if (host_config.backend_shader_framebuffer_fetch || !host_config.backend_dual_source_blend)
  278. uid_data->no_dual_src = 1;
  279. // Dual source is always enabled in the shader if this bug is not present
  280. else if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING))
  281. uid_data->no_dual_src = 0;
  282. // OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation.
  283. // Therefore, it is not necessary to use a uint output on these backends. We also disable the
  284. // uint output when logic op is not supported (i.e. driver/device does not support D3D11.1).
  285. if (api_type != APIType::D3D || !host_config.backend_logic_op)
  286. uid_data->uint_output = 0;
  287. }
  288. ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
  289. const pixel_ubershader_uid_data* uid_data,
  290. const CustomPixelShaderContents& custom_details)
  291. {
  292. const bool per_pixel_lighting = host_config.per_pixel_lighting;
  293. const bool msaa = host_config.msaa;
  294. const bool ssaa = host_config.ssaa;
  295. const bool stereo = host_config.stereo;
  296. const bool use_framebuffer_fetch = host_config.backend_shader_framebuffer_fetch;
  297. const bool use_dual_source = host_config.backend_dual_source_blend && !uid_data->no_dual_src;
  298. const bool early_depth = uid_data->early_depth != 0;
  299. const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
  300. const bool bounding_box = host_config.bounding_box;
  301. const u32 numTexgen = uid_data->num_texgens;
  302. ShaderCode out;
  303. ASSERT_MSG(VIDEO, !(use_dual_source && use_framebuffer_fetch),
  304. "If you're using framebuffer fetch, you shouldn't need dual source blend!");
  305. out.Write("// {}\n", *uid_data);
  306. WriteBitfieldExtractHeader(out, api_type, host_config);
  307. WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box, custom_details);
  308. WriteCustomShaderStructDef(&out, numTexgen);
  309. for (std::size_t i = 0; i < custom_details.shaders.size(); i++)
  310. {
  311. const auto& shader_details = custom_details.shaders[i];
  312. out.Write(fmt::runtime(shader_details.custom_shader), i);
  313. }
  314. if (per_pixel_lighting)
  315. WriteLightingFunction(out);
  316. #ifdef __APPLE__
  317. // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
  318. // if we want to use it.
  319. if (api_type == APIType::Vulkan || api_type == APIType::Metal)
  320. {
  321. if (use_dual_source)
  322. {
  323. out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
  324. "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
  325. }
  326. else
  327. {
  328. // Metal doesn't support a single unified variable for both input and output,
  329. // so when using framebuffer fetch, we declare the input separately below.
  330. out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
  331. use_framebuffer_fetch ? "real_ocol0" : "ocol0");
  332. }
  333. if (use_framebuffer_fetch)
  334. {
  335. // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
  336. out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
  337. }
  338. }
  339. else
  340. #endif
  341. {
  342. if (use_framebuffer_fetch)
  343. {
  344. out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
  345. }
  346. else
  347. {
  348. out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out {} ocol0;\n",
  349. uid_data->uint_output ? "uvec4" : "vec4");
  350. }
  351. if (use_dual_source)
  352. {
  353. out.Write("{} out {} ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)",
  354. uid_data->uint_output ? "uvec4" : "vec4");
  355. }
  356. }
  357. if (per_pixel_depth)
  358. out.Write("#define depth gl_FragDepth\n");
  359. if (host_config.backend_geometry_shaders)
  360. {
  361. out.Write("VARYING_LOCATION(0) in VertexData {{\n");
  362. GenerateVSOutputMembers(out, api_type, numTexgen, host_config,
  363. GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel);
  364. out.Write("}};\n\n");
  365. if (stereo && !host_config.backend_gl_layer_in_fs)
  366. out.Write("flat in int layer;");
  367. }
  368. else
  369. {
  370. // Let's set up attributes
  371. u32 counter = 0;
  372. out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
  373. GetInterpolationQualifier(msaa, ssaa));
  374. out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
  375. GetInterpolationQualifier(msaa, ssaa));
  376. for (u32 i = 0; i < numTexgen; ++i)
  377. {
  378. out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
  379. GetInterpolationQualifier(msaa, ssaa), i);
  380. }
  381. if (!host_config.fast_depth_calc)
  382. {
  383. out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
  384. GetInterpolationQualifier(msaa, ssaa));
  385. }
  386. if (per_pixel_lighting)
  387. {
  388. out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
  389. GetInterpolationQualifier(msaa, ssaa));
  390. out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
  391. GetInterpolationQualifier(msaa, ssaa));
  392. }
  393. }
  394. // Uniform index -> texture coordinates
  395. // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
  396. // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
  397. // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
  398. // the number of tex gens to 2 (bug 11462).
  399. if (numTexgen > 0)
  400. {
  401. out.Write("int2 selectTexCoord(uint index");
  402. for (u32 i = 0; i < numTexgen; i++)
  403. out.Write(", int2 fixpoint_uv{}", i);
  404. out.Write(") {{\n");
  405. if (api_type == APIType::D3D)
  406. {
  407. out.Write(" switch (index) {{\n");
  408. for (u32 i = 0; i < numTexgen; i++)
  409. {
  410. out.Write(" case {}u:\n"
  411. " return fixpoint_uv{};\n",
  412. i, i);
  413. }
  414. out.Write(" default:\n"
  415. " return fixpoint_uv0;\n"
  416. " }}\n");
  417. }
  418. else
  419. {
  420. out.Write(" if (index >= {}u) {{\n", numTexgen);
  421. out.Write(" return fixpoint_uv0;\n"
  422. " }}\n");
  423. if (numTexgen > 4)
  424. out.Write(" if (index < 4u) {{\n");
  425. if (numTexgen > 2)
  426. out.Write(" if (index < 2u) {{\n");
  427. if (numTexgen > 1)
  428. out.Write(" return (index == 0u) ? fixpoint_uv0 : fixpoint_uv1;\n");
  429. else
  430. out.Write(" return fixpoint_uv0;\n");
  431. if (numTexgen > 2)
  432. {
  433. out.Write(" }} else {{\n"); // >= 2 < min(4, numTexgen)
  434. if (numTexgen > 3)
  435. out.Write(" return (index == 2u) ? fixpoint_uv2 : fixpoint_uv3;\n");
  436. else
  437. out.Write(" return fixpoint_uv2;\n");
  438. out.Write(" }}\n");
  439. }
  440. if (numTexgen > 4)
  441. {
  442. out.Write(" }} else {{\n"); // >= 4 < min(8, numTexgen)
  443. if (numTexgen > 6)
  444. out.Write(" if (index < 6u) {{\n");
  445. if (numTexgen > 5)
  446. out.Write(" return (index == 4u) ? fixpoint_uv4 : fixpoint_uv5;\n");
  447. else
  448. out.Write(" return fixpoint_uv4;\n");
  449. if (numTexgen > 6)
  450. {
  451. out.Write(" }} else {{\n"); // >= 6 < min(8, numTexgen)
  452. if (numTexgen > 7)
  453. out.Write(" return (index == 6u) ? fixpoint_uv6 : fixpoint_uv7;\n");
  454. else
  455. out.Write(" return fixpoint_uv6;\n");
  456. out.Write(" }}\n");
  457. }
  458. out.Write(" }}\n");
  459. }
  460. }
  461. out.Write("}}\n\n");
  462. out.Write("uint selectTexCoordIndex(uint texmap)");
  463. out.Write("{{\n");
  464. if (api_type == APIType::D3D)
  465. {
  466. out.Write(" switch (texmap) {{\n");
  467. for (u32 i = 0; i < numTexgen; i++)
  468. {
  469. out.Write(" case {}u:\n"
  470. " return {}u;\n",
  471. i, i);
  472. }
  473. out.Write(" default:\n"
  474. " return 0u;\n"
  475. " }}\n");
  476. }
  477. else
  478. {
  479. out.Write(" if (texmap >= {}u) {{\n", numTexgen);
  480. out.Write(" return 0u;\n"
  481. " }}\n");
  482. if (numTexgen > 4)
  483. out.Write(" if (texmap < 4u) {{\n");
  484. if (numTexgen > 2)
  485. out.Write(" if (texmap < 2u) {{\n");
  486. if (numTexgen > 1)
  487. out.Write(" return (texmap == 0u) ? 0u : 1u;\n");
  488. else
  489. out.Write(" return 0u;\n");
  490. if (numTexgen > 2)
  491. {
  492. out.Write(" }} else {{\n"); // >= 2 < min(4, numTexgen)
  493. if (numTexgen > 3)
  494. out.Write(" return (texmap == 2u) ? 2u : 3u;\n");
  495. else
  496. out.Write(" return 2u;\n");
  497. out.Write(" }}\n");
  498. }
  499. if (numTexgen > 4)
  500. {
  501. out.Write(" }} else {{\n"); // >= 4 < min(8, numTexgen)
  502. if (numTexgen > 6)
  503. out.Write(" if (texmap < 6u) {{\n");
  504. if (numTexgen > 5)
  505. out.Write(" return (texmap == 4u) ? 4u : 5u;\n");
  506. else
  507. out.Write(" return 4u;\n");
  508. if (numTexgen > 6)
  509. {
  510. out.Write(" }} else {{\n"); // >= 6 < min(8, numTexgen)
  511. if (numTexgen > 7)
  512. out.Write(" return (texmap == 6u) ? 6u : 7u;\n");
  513. else
  514. out.Write(" return 6u;\n");
  515. out.Write(" }}\n");
  516. }
  517. out.Write(" }}\n");
  518. }
  519. }
  520. out.Write("}}\n\n");
  521. }
  522. // =====================
  523. // Texture Sampling
  524. // =====================
  525. if (host_config.backend_dynamic_sampler_indexing)
  526. {
  527. // Doesn't look like DirectX supports this. Oh well the code path is here just in case it
  528. // supports this in the future.
  529. out.Write("int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {{\n");
  530. out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n");
  531. out.Write("}}\n\n");
  532. }
  533. else
  534. {
  535. out.Write("int4 sampleTextureWrapper(uint sampler_num, int2 uv, int layer) {{\n"
  536. " // This is messy, but DirectX, OpenGL 3.3, and OpenGL ES 3.0 don't support "
  537. "dynamic indexing of the sampler array\n"
  538. " // With any luck the shader compiler will optimise this if the hardware supports "
  539. "dynamic indexing.\n"
  540. " switch(sampler_num) {{\n");
  541. for (int i = 0; i < 8; i++)
  542. {
  543. out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i);
  544. }
  545. out.Write(" }}\n"
  546. "}}\n\n");
  547. }
  548. // ======================
  549. // Arbitrary Swizzling
  550. // ======================
  551. out.Write("int4 Swizzle(uint s, int4 color) {{\n"
  552. " // AKA: Color Channel Swapping\n"
  553. "\n"
  554. " int4 ret;\n");
  555. out.Write(" ret.r = color[{}];\n", BitfieldExtract<&TevKSel::swap_rb>("bpmem_tevksel(s * 2u)"));
  556. out.Write(" ret.g = color[{}];\n", BitfieldExtract<&TevKSel::swap_ga>("bpmem_tevksel(s * 2u)"));
  557. out.Write(" ret.b = color[{}];\n",
  558. BitfieldExtract<&TevKSel::swap_rb>("bpmem_tevksel(s * 2u + 1u)"));
  559. out.Write(" ret.a = color[{}];\n",
  560. BitfieldExtract<&TevKSel::swap_ga>("bpmem_tevksel(s * 2u + 1u)"));
  561. out.Write(" return ret;\n"
  562. "}}\n\n");
  563. // ======================
  564. // Indirect Wrapping
  565. // ======================
  566. out.Write("int Wrap(int coord, uint mode) {{\n"
  567. " if (mode == 0u) // ITW_OFF\n"
  568. " return coord;\n"
  569. " else if (mode < 6u) // ITW_256 to ITW_16\n"
  570. " return coord & (0xfffe >> mode);\n"
  571. " else // ITW_0\n"
  572. " return 0;\n"
  573. "}}\n\n");
  574. // ======================
  575. // Indirect Lookup
  576. // ======================
  577. const auto LookupIndirectTexture = [&out](std::string_view out_var_name,
  578. std::string_view in_index_name) {
  579. // in_index_name is the indirect stage, not the tev stage
  580. // bpmem_iref is packed differently from RAS1_IREF
  581. // This function assumes bpmem_iref is nonzero (i.e. matrix is not off, and the
  582. // indirect texture stage is enabled).
  583. out.Write("{{\n"
  584. " uint iref = bpmem_iref({});\n"
  585. " uint texcoord = bitfieldExtract(iref, 0, 3);\n"
  586. " uint texmap = bitfieldExtract(iref, 8, 3);\n"
  587. " int2 fixedPoint_uv = getTexCoord(texcoord);\n"
  588. "\n"
  589. " if (({} & 1u) == 0u)\n"
  590. " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].xy;\n"
  591. " else\n"
  592. " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n"
  593. "\n"
  594. " {} = sampleTextureWrapper(texmap, fixedPoint_uv, layer).abg;\n"
  595. "}}\n",
  596. in_index_name, in_index_name, in_index_name, in_index_name, out_var_name);
  597. };
  598. // ======================
  599. // TEV's Special Lerp
  600. // ======================
  601. const auto WriteTevLerp = [&out](std::string_view components) {
  602. out.Write("// TEV's Linear Interpolate, plus bias, add/subtract and scale\n"
  603. "int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, "
  604. "uint scale) {{\n"
  605. " // Scale C from 0..255 to 0..256\n"
  606. " C += C >> 7;\n"
  607. "\n"
  608. " // Add bias to D\n"
  609. " if (bias == 1u) D += 128;\n"
  610. " else if (bias == 2u) D -= 128;\n"
  611. "\n"
  612. " int{0} lerp = (A << 8) + (B - A)*C;\n"
  613. " if (scale != 3u) {{\n"
  614. " lerp = lerp << scale;\n"
  615. " D = D << scale;\n"
  616. " }}\n"
  617. "\n"
  618. " // TODO: Is this rounding bias still added when the scale is divide by 2? "
  619. "Currently we "
  620. "do not apply it.\n"
  621. " if (scale != 3u)\n"
  622. " lerp = lerp + (op ? 127 : 128);\n"
  623. "\n"
  624. " int{0} result = lerp >> 8;\n"
  625. "\n"
  626. " // Add/Subtract D\n"
  627. " if (op) // Subtract\n"
  628. " result = D - result;\n"
  629. " else // Add\n"
  630. " result = D + result;\n"
  631. "\n"
  632. " // Most of the Scale was moved inside the lerp for improved precision\n"
  633. " // But we still do the divide by 2 here\n"
  634. " if (scale == 3u)\n"
  635. " result = result >> 1;\n"
  636. " return result;\n"
  637. "}}\n\n",
  638. components);
  639. };
  640. WriteTevLerp(""); // int
  641. WriteTevLerp("3"); // int3
  642. // =======================
  643. // TEV's Color Compare
  644. // =======================
  645. out.Write(
  646. "// Implements operations 0-5 of TEV's compare mode,\n"
  647. "// which are common to both color and alpha channels\n"
  648. "bool tevCompare(uint op, int3 color_A, int3 color_B) {{\n"
  649. " switch (op) {{\n"
  650. " case 0u: // TevCompareMode::R8, TevComparison::GT\n"
  651. " return (color_A.r > color_B.r);\n"
  652. " case 1u: // TevCompareMode::R8, TevComparison::EQ\n"
  653. " return (color_A.r == color_B.r);\n"
  654. " case 2u: // TevCompareMode::GR16, TevComparison::GT\n"
  655. " int A_16 = (color_A.r | (color_A.g << 8));\n"
  656. " int B_16 = (color_B.r | (color_B.g << 8));\n"
  657. " return A_16 > B_16;\n"
  658. " case 3u: // TevCompareMode::GR16, TevComparison::EQ\n"
  659. " return (color_A.r == color_B.r && color_A.g == color_B.g);\n"
  660. " case 4u: // TevCompareMode::BGR24, TevComparison::GT\n"
  661. " int A_24 = (color_A.r | (color_A.g << 8) | (color_A.b << 16));\n"
  662. " int B_24 = (color_B.r | (color_B.g << 8) | (color_B.b << 16));\n"
  663. " return A_24 > B_24;\n"
  664. " case 5u: // TevCompareMode::BGR24, TevComparison::EQ\n"
  665. " return (color_A.r == color_B.r && color_A.g == color_B.g && color_A.b == color_B.b);\n"
  666. " default:\n"
  667. " return false;\n"
  668. " }}\n"
  669. "}}\n\n");
  670. // =================
  671. // Input Selects
  672. // =================
  673. out.Write("struct State {{\n"
  674. " int4 Reg[4];\n"
  675. " int4 TexColor;\n"
  676. " int AlphaBump;\n"
  677. "}};\n"
  678. "struct StageState {{\n"
  679. " uint stage;\n"
  680. " uint order;\n"
  681. " uint cc;\n"
  682. " uint ac;\n"
  683. "}};\n"
  684. "\n"
  685. "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1);\n"
  686. "int4 getKonstColor(State s, StageState ss);\n"
  687. "\n");
  688. static constexpr Common::EnumMap<std::string_view, CompareMode::Always> tev_alpha_funcs_table{
  689. "return false;", // CompareMode::Never
  690. "return a < b;", // CompareMode::Less
  691. "return a == b;", // CompareMode::Equal
  692. "return a <= b;", // CompareMode::LEqual
  693. "return a > b;", // CompareMode::Greater
  694. "return a != b;", // CompareMode::NEqual
  695. "return a >= b;", // CompareMode::GEqual
  696. "return true;" // CompareMode::Always
  697. };
  698. static constexpr Common::EnumMap<std::string_view, TevColorArg::Zero> tev_c_input_table{
  699. "return s.Reg[0].rgb;", // CPREV,
  700. "return s.Reg[0].aaa;", // APREV,
  701. "return s.Reg[1].rgb;", // C0,
  702. "return s.Reg[1].aaa;", // A0,
  703. "return s.Reg[2].rgb;", // C1,
  704. "return s.Reg[2].aaa;", // A1,
  705. "return s.Reg[3].rgb;", // C2,
  706. "return s.Reg[3].aaa;", // A2,
  707. "return s.TexColor.rgb;", // TEXC,
  708. "return s.TexColor.aaa;", // TEXA,
  709. "return getRasColor(s, ss, colors_0, colors_1).rgb;", // RASC,
  710. "return getRasColor(s, ss, colors_0, colors_1).aaa;", // RASA,
  711. "return int3(255, 255, 255);", // ONE
  712. "return int3(128, 128, 128);", // HALF
  713. "return getKonstColor(s, ss).rgb;", // KONST
  714. "return int3(0, 0, 0);", // ZERO
  715. };
  716. static constexpr Common::EnumMap<std::string_view, TevColorArg::Zero> tev_c_input_type{
  717. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;",
  718. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;",
  719. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  720. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  721. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  722. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  723. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  724. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  725. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;",
  726. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;",
  727. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;",
  728. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;",
  729. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
  730. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
  731. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;",
  732. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
  733. };
  734. static constexpr Common::EnumMap<std::string_view, TevAlphaArg::Zero> tev_a_input_table{
  735. "return s.Reg[0].a;", // APREV,
  736. "return s.Reg[1].a;", // A0,
  737. "return s.Reg[2].a;", // A1,
  738. "return s.Reg[3].a;", // A2,
  739. "return s.TexColor.a;", // TEXA,
  740. "return getRasColor(s, ss, colors_0, colors_1).a;", // RASA,
  741. "return getKonstColor(s, ss).a;", // KONST, (hw1 had quarter)
  742. "return 0;", // ZERO
  743. };
  744. static constexpr Common::EnumMap<std::string_view, TevAlphaArg::Zero> tev_a_input_type{
  745. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;",
  746. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  747. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  748. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;",
  749. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;",
  750. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;",
  751. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;",
  752. "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;",
  753. };
  754. static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_regs_lookup_table{
  755. "return s.Reg[0];",
  756. "return s.Reg[1];",
  757. "return s.Reg[2];",
  758. "return s.Reg[3];",
  759. };
  760. static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_c_set_table{
  761. "s.Reg[0].rgb = color;",
  762. "s.Reg[1].rgb = color;",
  763. "s.Reg[2].rgb = color;",
  764. "s.Reg[3].rgb = color;",
  765. };
  766. static constexpr Common::EnumMap<std::string_view, TevOutput::Color2> tev_a_set_table{
  767. "s.Reg[0].a = alpha;",
  768. "s.Reg[1].a = alpha;",
  769. "s.Reg[2].a = alpha;",
  770. "s.Reg[3].a = alpha;",
  771. };
  772. out.Write("// Helper function for Alpha Test\n"
  773. "bool alphaCompare(int a, int b, uint compare) {{\n");
  774. WriteSwitch(out, api_type, "compare", tev_alpha_funcs_table, 2, false);
  775. out.Write("}}\n"
  776. "\n"
  777. "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
  778. "uint index) {{\n");
  779. WriteSwitch(out, api_type, "index", tev_c_input_table, 2, false);
  780. out.Write("}}\n"
  781. "\n"
  782. "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, "
  783. "uint index) {{\n");
  784. WriteSwitch(out, api_type, "index", tev_a_input_table, 2, false);
  785. out.Write("}}\n"
  786. "\n"
  787. "int4 getTevReg(in State s, uint index) {{\n");
  788. WriteSwitch(out, api_type, "index", tev_regs_lookup_table, 2, false);
  789. out.Write("}}\n"
  790. "\n");
  791. out.Write("// Helper function for Custom Shader Input Type\n"
  792. "uint getColorInputType(uint index) {{\n");
  793. WriteSwitch(out, api_type, "index", tev_c_input_type, 2, false);
  794. out.Write("}}\n"
  795. "\n"
  796. "uint getAlphaInputType(uint index) {{\n");
  797. WriteSwitch(out, api_type, "index", tev_a_input_type, 2, false);
  798. out.Write("}}\n"
  799. "\n");
  800. // Since the fixed-point texture coodinate variables aren't global, we need to pass
  801. // them to the select function. This applies to all backends.
  802. if (numTexgen > 0)
  803. {
  804. out.Write("#define getTexCoord(index) selectTexCoord((index)");
  805. for (u32 i = 0; i < numTexgen; i++)
  806. out.Write(", fixpoint_uv{}", i);
  807. out.Write(")\n\n");
  808. }
  809. if (early_depth && host_config.backend_early_z)
  810. out.Write("FORCE_EARLY_Z;\n");
  811. out.Write("void main()\n{{\n");
  812. out.Write(" float4 rawpos = gl_FragCoord;\n");
  813. out.Write(" uint num_stages = {};\n\n",
  814. BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode"));
  815. bool has_custom_shader_details = false;
  816. if (std::any_of(custom_details.shaders.begin(), custom_details.shaders.end(),
  817. [](const std::optional<CustomPixelShader>& ps) { return ps.has_value(); }))
  818. {
  819. WriteCustomShaderStructImpl(&out, numTexgen, per_pixel_lighting);
  820. has_custom_shader_details = true;
  821. }
  822. if (use_framebuffer_fetch)
  823. {
  824. // Store off a copy of the initial framebuffer value.
  825. //
  826. // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
  827. // framebuffer), we read from real_ocol0.
  828. out.Write("#ifdef FB_FETCH_VALUE\n"
  829. " float4 initial_ocol0 = FB_FETCH_VALUE;\n"
  830. "#else\n"
  831. " float4 initial_ocol0 = real_ocol0;\n"
  832. "#endif\n");
  833. // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
  834. // intermediate value with multiple reads & modifications, so we pull out the "real" output
  835. // value above and use a temporary for calculations, then set the output value once at the
  836. // end of the shader.
  837. out.Write(" float4 ocol0;\n"
  838. " float4 ocol1;\n");
  839. }
  840. if (host_config.backend_geometry_shaders && stereo)
  841. {
  842. if (host_config.backend_gl_layer_in_fs)
  843. out.Write("\tint layer = gl_Layer;\n");
  844. }
  845. else
  846. {
  847. out.Write("\tint layer = 0;\n");
  848. }
  849. out.Write(" int3 tevcoord = int3(0, 0, 0);\n"
  850. " State s;\n"
  851. " s.TexColor = int4(0, 0, 0, 0);\n"
  852. " s.AlphaBump = 0;\n"
  853. "\n");
  854. for (int i = 0; i < 4; i++)
  855. out.Write(" s.Reg[{}] = " I_COLORS "[{}];\n", i, i);
  856. const char* color_input_prefix = "";
  857. if (per_pixel_lighting)
  858. {
  859. out.Write(" float4 lit_colors_0 = colors_0;\n"
  860. " float4 lit_colors_1 = colors_1;\n"
  861. " float3 lit_normal = normalize(Normal.xyz);\n"
  862. " float3 lit_pos = WorldPos.xyz;\n");
  863. WriteVertexLighting(out, api_type, "lit_pos", "lit_normal", "colors_0", "colors_1",
  864. "lit_colors_0", "lit_colors_1");
  865. color_input_prefix = "lit_";
  866. out.Write(" // The number of colors available to TEV is determined by numColorChans.\n"
  867. " // Normally this is performed in the vertex shader after lighting,\n"
  868. " // but with per-pixel lighting, we need to perform it here.\n"
  869. " // TODO: Actually implement this for ubershaders\n"
  870. " // if (xfmem_numColorChans == 0u)\n"
  871. " // o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);\n"
  872. " // if (xfmem_numColorChans <= 1u)\n"
  873. " // o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);\n");
  874. }
  875. out.Write(" // Main tev loop\n");
  876. out.Write(" for(uint stage = 0u; stage <= num_stages; stage++)\n"
  877. " {{\n"
  878. " StageState ss;\n"
  879. " ss.stage = stage;\n"
  880. " ss.cc = bpmem_combiners(stage).x;\n"
  881. " ss.ac = bpmem_combiners(stage).y;\n"
  882. " ss.order = bpmem_tevorder(stage>>1);\n"
  883. " if ((stage & 1u) == 1u)\n"
  884. " ss.order = ss.order >> {};\n\n",
  885. int(TwoTevStageOrders().enable_tex_odd.StartBit() -
  886. TwoTevStageOrders().enable_tex_even.StartBit()));
  887. // Disable texturing when there are no texgens (for now)
  888. if (numTexgen != 0)
  889. {
  890. for (u32 i = 0; i < numTexgen; i++)
  891. {
  892. out.Write(" int2 fixpoint_uv{} = int2(", i);
  893. out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
  894. out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i);
  895. // TODO: S24 overflows here?
  896. }
  897. out.Write("\n"
  898. " uint tex_coord = {};\n",
  899. BitfieldExtract<&TwoTevStageOrders::texcoord_even>("ss.order"));
  900. out.Write(" int2 fixedPoint_uv = getTexCoord(tex_coord);\n"
  901. "\n"
  902. " bool texture_enabled = (ss.order & {}u) != 0u;\n",
  903. 1 << TwoTevStageOrders().enable_tex_even.StartBit());
  904. out.Write("\n"
  905. " // Indirect textures\n"
  906. " uint tevind = bpmem_tevind(stage);\n"
  907. " if (tevind != 0u)\n"
  908. " {{\n"
  909. " uint bs = {};\n",
  910. BitfieldExtract<&TevStageIndirect::bs>("tevind"));
  911. out.Write(" uint fmt = {};\n", BitfieldExtract<&TevStageIndirect::fmt>("tevind"));
  912. out.Write(" uint bias = {};\n", BitfieldExtract<&TevStageIndirect::bias>("tevind"));
  913. out.Write(" uint bt = {};\n", BitfieldExtract<&TevStageIndirect::bt>("tevind"));
  914. out.Write(" uint matrix_index = {};\n",
  915. BitfieldExtract<&TevStageIndirect::matrix_index>("tevind"));
  916. out.Write(" uint matrix_id = {};\n",
  917. BitfieldExtract<&TevStageIndirect::matrix_id>("tevind"));
  918. out.Write(" int2 indtevtrans = int2(0, 0);\n"
  919. "\n");
  920. // There is always a bit set in bpmem_iref if the data is valid (matrix is not off, and the
  921. // indirect texture stage is enabled). If the matrix is off, the result doesn't matter; if the
  922. // indirect texture stage is disabled, the result is undefined (and produces a glitchy pattern
  923. // on hardware, different from this).
  924. // For the undefined case, we just skip applying the indirect operation, which is close
  925. // enough. Viewtiful Joe hits the undefined case (bug 12525). Wrapping and add to previous
  926. // still apply in this case (and when the stage is disabled).
  927. out.Write(" if (bpmem_iref(bt) != 0u) {{\n");
  928. out.Write(" int3 indcoord;\n");
  929. LookupIndirectTexture("indcoord", "bt");
  930. out.Write(" if (bs != 0u)\n"
  931. " s.AlphaBump = indcoord[bs - 1u];\n"
  932. " switch(fmt)\n"
  933. " {{\n"
  934. " case {:s}:\n",
  935. IndTexFormat::ITF_8);
  936. out.Write(" indcoord.x = indcoord.x + ((bias & 1u) != 0u ? -128 : 0);\n"
  937. " indcoord.y = indcoord.y + ((bias & 2u) != 0u ? -128 : 0);\n"
  938. " indcoord.z = indcoord.z + ((bias & 4u) != 0u ? -128 : 0);\n"
  939. " s.AlphaBump = s.AlphaBump & 0xf8;\n"
  940. " break;\n"
  941. " case {:s}:\n",
  942. IndTexFormat::ITF_5);
  943. out.Write(" indcoord.x = (indcoord.x >> 3) + ((bias & 1u) != 0u ? 1 : 0);\n"
  944. " indcoord.y = (indcoord.y >> 3) + ((bias & 2u) != 0u ? 1 : 0);\n"
  945. " indcoord.z = (indcoord.z >> 3) + ((bias & 4u) != 0u ? 1 : 0);\n"
  946. " s.AlphaBump = s.AlphaBump << 5;\n"
  947. " break;\n"
  948. " case {:s}:\n",
  949. IndTexFormat::ITF_4);
  950. out.Write(" indcoord.x = (indcoord.x >> 4) + ((bias & 1u) != 0u ? 1 : 0);\n"
  951. " indcoord.y = (indcoord.y >> 4) + ((bias & 2u) != 0u ? 1 : 0);\n"
  952. " indcoord.z = (indcoord.z >> 4) + ((bias & 4u) != 0u ? 1 : 0);\n"
  953. " s.AlphaBump = s.AlphaBump << 4;\n"
  954. " break;\n"
  955. " case {:s}:\n",
  956. IndTexFormat::ITF_3);
  957. out.Write(" indcoord.x = (indcoord.x >> 5) + ((bias & 1u) != 0u ? 1 : 0);\n"
  958. " indcoord.y = (indcoord.y >> 5) + ((bias & 2u) != 0u ? 1 : 0);\n"
  959. " indcoord.z = (indcoord.z >> 5) + ((bias & 4u) != 0u ? 1 : 0);\n"
  960. " s.AlphaBump = s.AlphaBump << 3;\n"
  961. " break;\n"
  962. " }}\n"
  963. "\n"
  964. " // Matrix multiply\n"
  965. " if (matrix_index != 0u)\n"
  966. " {{\n"
  967. " uint mtxidx = 2u * (matrix_index - 1u);\n"
  968. " int shift = " I_INDTEXMTX "[mtxidx].w;\n"
  969. "\n"
  970. " switch (matrix_id)\n"
  971. " {{\n"
  972. " case 0u: // 3x2 S0.10 matrix\n"
  973. " indtevtrans = int2(idot(" I_INDTEXMTX
  974. "[mtxidx].xyz, indcoord), idot(" I_INDTEXMTX "[mtxidx + 1u].xyz, indcoord)) >> 3;\n"
  975. " break;\n"
  976. " case 1u: // S matrix, S17.7 format\n"
  977. " indtevtrans = (fixedPoint_uv * indcoord.xx) >> 8;\n"
  978. " break;\n"
  979. " case 2u: // T matrix, S17.7 format\n"
  980. " indtevtrans = (fixedPoint_uv * indcoord.yy) >> 8;\n"
  981. " break;\n"
  982. " }}\n"
  983. "\n"
  984. " if (shift >= 0)\n"
  985. " indtevtrans = indtevtrans >> shift;\n"
  986. " else\n"
  987. " indtevtrans = indtevtrans << ((-shift) & 31);\n"
  988. " }}\n"
  989. " }}\n"
  990. "\n"
  991. " // Wrapping\n"
  992. " uint sw = {};\n",
  993. BitfieldExtract<&TevStageIndirect::sw>("tevind"));
  994. out.Write(" uint tw = {}; \n", BitfieldExtract<&TevStageIndirect::tw>("tevind"));
  995. out.Write(
  996. " int2 wrapped_coord = int2(Wrap(fixedPoint_uv.x, sw), Wrap(fixedPoint_uv.y, tw));\n"
  997. "\n"
  998. " if ((tevind & {}u) != 0u) // add previous tevcoord\n",
  999. 1 << TevStageIndirect().fb_addprev.StartBit());
  1000. out.Write(" tevcoord.xy += wrapped_coord + indtevtrans;\n"
  1001. " else\n"
  1002. " tevcoord.xy = wrapped_coord + indtevtrans;\n"
  1003. "\n"
  1004. " // Emulate s24 overflows\n"
  1005. " tevcoord.xy = (tevcoord.xy << 8) >> 8;\n"
  1006. " }}\n"
  1007. " else\n"
  1008. " {{\n"
  1009. " tevcoord.xy = fixedPoint_uv;\n"
  1010. " }}\n"
  1011. "\n"
  1012. " // Sample texture for stage\n"
  1013. " if (texture_enabled) {{\n"
  1014. " uint sampler_num = {};\n",
  1015. BitfieldExtract<&TwoTevStageOrders::texmap_even>("ss.order"));
  1016. out.Write("\n"
  1017. " int4 color = sampleTextureWrapper(sampler_num, tevcoord.xy, layer);\n"
  1018. " uint swap = {};\n",
  1019. BitfieldExtract<&TevStageCombiner::AlphaCombiner::tswap>("ss.ac"));
  1020. out.Write(" s.TexColor = Swizzle(swap, color);\n");
  1021. out.Write(" }} else {{\n"
  1022. " // Texture is disabled\n"
  1023. " s.TexColor = int4(255, 255, 255, 255);\n"
  1024. " }}\n"
  1025. "\n");
  1026. }
  1027. out.Write(" // This is the Meat of TEV\n"
  1028. " {{\n"
  1029. " // Color Combiner\n");
  1030. out.Write(" uint color_a = {};\n",
  1031. BitfieldExtract<&TevStageCombiner::ColorCombiner::a>("ss.cc"));
  1032. out.Write(" uint color_b = {};\n",
  1033. BitfieldExtract<&TevStageCombiner::ColorCombiner::b>("ss.cc"));
  1034. out.Write(" uint color_c = {};\n",
  1035. BitfieldExtract<&TevStageCombiner::ColorCombiner::c>("ss.cc"));
  1036. out.Write(" uint color_d = {};\n",
  1037. BitfieldExtract<&TevStageCombiner::ColorCombiner::d>("ss.cc"));
  1038. out.Write(" uint color_bias = {};\n",
  1039. BitfieldExtract<&TevStageCombiner::ColorCombiner::bias>("ss.cc"));
  1040. out.Write(" bool color_op = bool({});\n",
  1041. BitfieldExtract<&TevStageCombiner::ColorCombiner::op>("ss.cc"));
  1042. out.Write(" bool color_clamp = bool({});\n",
  1043. BitfieldExtract<&TevStageCombiner::ColorCombiner::clamp>("ss.cc"));
  1044. out.Write(" uint color_scale = {};\n",
  1045. BitfieldExtract<&TevStageCombiner::ColorCombiner::scale>("ss.cc"));
  1046. out.Write(" uint color_dest = {};\n",
  1047. BitfieldExtract<&TevStageCombiner::ColorCombiner::dest>("ss.cc"));
  1048. out.Write(
  1049. " uint color_compare_op = color_scale << 1 | uint(color_op);\n"
  1050. "\n"
  1051. " int3 color_A = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_a) & "
  1052. "int3(255, 255, 255);\n"
  1053. " int3 color_B = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_b) & "
  1054. "int3(255, 255, 255);\n"
  1055. " int3 color_C = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_c) & "
  1056. "int3(255, 255, 255);\n"
  1057. " int3 color_D = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_d); // 10 "
  1058. "bits + sign\n"
  1059. "\n", // TODO: do we need to sign extend?
  1060. color_input_prefix);
  1061. out.Write(
  1062. " int3 color;\n"
  1063. " if (color_bias != 3u) {{ // Normal mode\n"
  1064. " color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, "
  1065. "color_scale);\n"
  1066. " }} else {{ // Compare mode\n"
  1067. " // op 6 and 7 do a select per color channel\n"
  1068. " if (color_compare_op == 6u) {{\n"
  1069. " // TevCompareMode::RGB8, TevComparison::GT\n"
  1070. " color.r = (color_A.r > color_B.r) ? color_C.r : 0;\n"
  1071. " color.g = (color_A.g > color_B.g) ? color_C.g : 0;\n"
  1072. " color.b = (color_A.b > color_B.b) ? color_C.b : 0;\n"
  1073. " }} else if (color_compare_op == 7u) {{\n"
  1074. " // TevCompareMode::RGB8, TevComparison::EQ\n"
  1075. " color.r = (color_A.r == color_B.r) ? color_C.r : 0;\n"
  1076. " color.g = (color_A.g == color_B.g) ? color_C.g : 0;\n"
  1077. " color.b = (color_A.b == color_B.b) ? color_C.b : 0;\n"
  1078. " }} else {{\n"
  1079. " // The remaining ops do one compare which selects all 3 channels\n"
  1080. " color = tevCompare(color_compare_op, color_A, color_B) ? color_C : int3(0, 0, "
  1081. "0);\n"
  1082. " }}\n"
  1083. " color = color_D + color;\n"
  1084. " }}\n"
  1085. "\n"
  1086. " // Clamp result\n"
  1087. " if (color_clamp)\n"
  1088. " color = clamp(color, 0, 255);\n"
  1089. " else\n"
  1090. " color = clamp(color, -1024, 1023);\n"
  1091. "\n"
  1092. " // Write result to the correct input register of the next stage\n");
  1093. WriteSwitch(out, api_type, "color_dest", tev_c_set_table, 6, true);
  1094. out.Write("\n");
  1095. // Alpha combiner
  1096. out.Write(" // Alpha Combiner\n");
  1097. out.Write(" uint alpha_a = {};\n",
  1098. BitfieldExtract<&TevStageCombiner::AlphaCombiner::a>("ss.ac"));
  1099. out.Write(" uint alpha_b = {};\n",
  1100. BitfieldExtract<&TevStageCombiner::AlphaCombiner::b>("ss.ac"));
  1101. out.Write(" uint alpha_c = {};\n",
  1102. BitfieldExtract<&TevStageCombiner::AlphaCombiner::c>("ss.ac"));
  1103. out.Write(" uint alpha_d = {};\n",
  1104. BitfieldExtract<&TevStageCombiner::AlphaCombiner::d>("ss.ac"));
  1105. out.Write(" uint alpha_bias = {};\n",
  1106. BitfieldExtract<&TevStageCombiner::AlphaCombiner::bias>("ss.ac"));
  1107. out.Write(" bool alpha_op = bool({});\n",
  1108. BitfieldExtract<&TevStageCombiner::AlphaCombiner::op>("ss.ac"));
  1109. out.Write(" bool alpha_clamp = bool({});\n",
  1110. BitfieldExtract<&TevStageCombiner::AlphaCombiner::clamp>("ss.ac"));
  1111. out.Write(" uint alpha_scale = {};\n",
  1112. BitfieldExtract<&TevStageCombiner::AlphaCombiner::scale>("ss.ac"));
  1113. out.Write(" uint alpha_dest = {};\n",
  1114. BitfieldExtract<&TevStageCombiner::AlphaCombiner::dest>("ss.ac"));
  1115. out.Write(
  1116. " uint alpha_compare_op = alpha_scale << 1 | uint(alpha_op);\n"
  1117. "\n"
  1118. " int alpha_A = 0;\n"
  1119. " int alpha_B = 0;\n"
  1120. " if (alpha_bias != 3u || alpha_compare_op > 5u) {{\n"
  1121. " // Small optimisation here: alpha_A and alpha_B are unused by compare ops 0-5\n"
  1122. " alpha_A = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_a) & 255;\n"
  1123. " alpha_B = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_b) & 255;\n"
  1124. " }};\n"
  1125. " int alpha_C = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_c) & 255;\n"
  1126. " int alpha_D = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_d); // 10 "
  1127. "bits "
  1128. "+ sign\n"
  1129. "\n", // TODO: do we need to sign extend?
  1130. color_input_prefix);
  1131. out.Write("\n"
  1132. " int alpha;\n"
  1133. " if (alpha_bias != 3u) {{ // Normal mode\n"
  1134. " alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, "
  1135. "alpha_scale);\n"
  1136. " }} else {{ // Compare mode\n"
  1137. " if (alpha_compare_op == 6u) {{\n"
  1138. " // TevCompareMode::A8, TevComparison::GT\n"
  1139. " alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n"
  1140. " }} else if (alpha_compare_op == 7u) {{\n"
  1141. " // TevCompareMode::A8, TevComparison::EQ\n"
  1142. " alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n"
  1143. " }} else {{\n"
  1144. " // All remaining alpha compare ops actually compare the color channels\n"
  1145. " alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n"
  1146. " }}\n"
  1147. " alpha = alpha_D + alpha;\n"
  1148. " }}\n"
  1149. "\n"
  1150. " // Clamp result\n"
  1151. " if (alpha_clamp)\n"
  1152. " alpha = clamp(alpha, 0, 255);\n"
  1153. " else\n"
  1154. " alpha = clamp(alpha, -1024, 1023);\n"
  1155. "\n"
  1156. " // Write result to the correct input register of the next stage\n");
  1157. WriteSwitch(out, api_type, "alpha_dest", tev_a_set_table, 6, true);
  1158. if (has_custom_shader_details)
  1159. {
  1160. for (u32 stage_index = 0; stage_index < 16; stage_index++)
  1161. {
  1162. out.Write("\tif (stage == {}u) {{\n", stage_index);
  1163. // Color input
  1164. out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].value = color_A / float3(255.0, "
  1165. "255.0, 255.0);\n",
  1166. stage_index);
  1167. out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].input_type = "
  1168. "getColorInputType(color_a);\n",
  1169. stage_index);
  1170. out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].value = color_B / float3(255.0, "
  1171. "255.0, 255.0);\n",
  1172. stage_index);
  1173. out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].input_type = "
  1174. "getColorInputType(color_b);\n",
  1175. stage_index);
  1176. out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].value = color_C / float3(255.0, "
  1177. "255.0, 255.0);\n",
  1178. stage_index);
  1179. out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].input_type = "
  1180. "getColorInputType(color_c);\n",
  1181. stage_index);
  1182. out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].value = color_D / float3(255.0, "
  1183. "255.0, 255.0);\n",
  1184. stage_index);
  1185. out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].input_type = "
  1186. "getColorInputType(color_c);\n",
  1187. stage_index);
  1188. // Alpha input
  1189. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].value = alpha_A / float(255.0);\n",
  1190. stage_index);
  1191. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].input_type = "
  1192. "getAlphaInputType(alpha_a);\n",
  1193. stage_index);
  1194. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].value = alpha_B / float(255.0);\n",
  1195. stage_index);
  1196. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].input_type = "
  1197. "getAlphaInputType(alpha_b);\n",
  1198. stage_index);
  1199. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].value = alpha_C / float(255.0);\n",
  1200. stage_index);
  1201. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].input_type = "
  1202. "getAlphaInputType(alpha_c);\n",
  1203. stage_index);
  1204. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].value = alpha_D / float(255.0);\n",
  1205. stage_index);
  1206. out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].input_type = "
  1207. "getAlphaInputType(alpha_d);\n",
  1208. stage_index);
  1209. if (numTexgen != 0)
  1210. {
  1211. // Texmap
  1212. out.Write("\t\tif (texture_enabled) {{\n");
  1213. out.Write("\t\t\tuint sampler_num = {};\n",
  1214. BitfieldExtract<&TwoTevStageOrders::texmap_even>("ss.order"));
  1215. out.Write("\t\tcustom_data.tev_stages[{}].texmap = sampler_num;\n", stage_index);
  1216. out.Write("\t\t}}\n");
  1217. }
  1218. // Output
  1219. out.Write("\t\tcustom_data.tev_stages[{}].output_color.rgb = color / float3(255.0, 255.0, "
  1220. "255.0);\n",
  1221. stage_index);
  1222. out.Write("\t\tcustom_data.tev_stages[{}].output_color.a = alpha / float(255.0);\n",
  1223. stage_index);
  1224. out.Write("\t}}\n");
  1225. }
  1226. }
  1227. out.Write(" }}\n");
  1228. out.Write(" }} // Main TEV loop\n");
  1229. out.Write("\n");
  1230. // Select the output color and alpha registers from the last stage.
  1231. out.Write(" int4 TevResult;\n");
  1232. out.Write(
  1233. " TevResult.xyz = getTevReg(s, {}).xyz;\n",
  1234. BitfieldExtract<&TevStageCombiner::ColorCombiner::dest>("bpmem_combiners(num_stages).x"));
  1235. out.Write(
  1236. " TevResult.w = getTevReg(s, {}).w;\n",
  1237. BitfieldExtract<&TevStageCombiner::AlphaCombiner::dest>("bpmem_combiners(num_stages).y"));
  1238. out.Write(" TevResult &= 255;\n\n");
  1239. if (host_config.fast_depth_calc)
  1240. {
  1241. if (!host_config.backend_reversed_depth_range)
  1242. out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
  1243. else
  1244. out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n");
  1245. out.Write(" zCoord = clamp(zCoord, 0, 0xFFFFFF);\n"
  1246. "\n");
  1247. }
  1248. else
  1249. {
  1250. out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
  1251. "[1].y));\n");
  1252. }
  1253. // ===========
  1254. // ZFreeze
  1255. // ===========
  1256. if (per_pixel_depth)
  1257. {
  1258. // Zfreeze forces early depth off
  1259. out.Write(" // ZFreeze\n"
  1260. " if ((bpmem_genmode & {}u) != 0u) {{\n",
  1261. 1 << GenMode().zfreeze.StartBit());
  1262. out.Write(" float2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
  1263. if (api_type == APIType::OpenGL)
  1264. {
  1265. out.Write(" // OpenGL has reversed vertical screenspace coordinates\n"
  1266. " screenpos.y = 528.0 - screenpos.y;\n");
  1267. }
  1268. out.Write(" zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
  1269. ".y * screenpos.y);\n"
  1270. " }}\n"
  1271. "\n");
  1272. }
  1273. // =================
  1274. // Depth Texture
  1275. // =================
  1276. out.Write(" // Depth Texture\n"
  1277. " int early_zCoord = zCoord;\n"
  1278. " if (bpmem_ztex_op != 0u) {{\n"
  1279. " int ztex = int(" I_ZBIAS "[1].w); // fixed bias\n"
  1280. "\n"
  1281. " // Whatever texture was in our last stage, it's now our depth texture\n"
  1282. " ztex += idot(s.TexColor.xyzw, " I_ZBIAS "[0].xyzw);\n"
  1283. " ztex += (bpmem_ztex_op == 1u) ? zCoord : 0;\n"
  1284. " zCoord = ztex & 0xFFFFFF;\n"
  1285. " }}\n"
  1286. "\n");
  1287. if (per_pixel_depth)
  1288. {
  1289. out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n"
  1290. " // If early depth isn't enabled, we write to the zbuffer here\n"
  1291. " int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n");
  1292. if (!host_config.backend_reversed_depth_range)
  1293. out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n");
  1294. else
  1295. out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n");
  1296. }
  1297. out.Write(" // Alpha Test\n");
  1298. if (early_depth && DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
  1299. {
  1300. // Instead of using discard, fetch the framebuffer's color value and use it as the output
  1301. // for this fragment.
  1302. out.Write(" #define discard_fragment {{ real_ocol0 = float4(initial_ocol0.xyz, 1.0); "
  1303. "return; }}\n");
  1304. }
  1305. else
  1306. {
  1307. out.Write(" #define discard_fragment discard\n");
  1308. }
  1309. out.Write(" if (bpmem_alphaTest != 0u) {{\n"
  1310. " bool comp0 = alphaCompare(TevResult.a, " I_ALPHA ".r, {});\n",
  1311. BitfieldExtract<&AlphaTest::comp0>("bpmem_alphaTest"));
  1312. out.Write(" bool comp1 = alphaCompare(TevResult.a, " I_ALPHA ".g, {});\n",
  1313. BitfieldExtract<&AlphaTest::comp1>("bpmem_alphaTest"));
  1314. out.Write("\n"
  1315. " // These if statements are written weirdly to work around intel and Qualcomm "
  1316. "bugs with handling booleans.\n"
  1317. " switch ({}) {{\n",
  1318. BitfieldExtract<&AlphaTest::logic>("bpmem_alphaTest"));
  1319. out.Write(" case 0u: // AND\n"
  1320. " if (comp0 && comp1) break; else discard_fragment; break;\n"
  1321. " case 1u: // OR\n"
  1322. " if (comp0 || comp1) break; else discard_fragment; break;\n"
  1323. " case 2u: // XOR\n"
  1324. " if (comp0 != comp1) break; else discard_fragment; break;\n"
  1325. " case 3u: // XNOR\n"
  1326. " if (comp0 == comp1) break; else discard_fragment; break;\n"
  1327. " }}\n"
  1328. " }}\n"
  1329. "\n");
  1330. out.Write(" // Hardware testing indicates that an alpha of 1 can pass an alpha test,\n"
  1331. " // but doesn't do anything in blending\n"
  1332. " if (TevResult.a == 1) TevResult.a = 0;\n");
  1333. // =========
  1334. // Dithering
  1335. // =========
  1336. out.Write(" if (bpmem_dither) {{\n"
  1337. " // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering\n"
  1338. " // Here the matrix is encoded into the two factor constants\n"
  1339. " int2 dither = int2(rawpos.xy) & 1;\n"
  1340. " TevResult.rgb = (TevResult.rgb - (TevResult.rgb >> 6)) + abs(dither.y * 3 - "
  1341. "dither.x * 2);\n"
  1342. " }}\n\n");
  1343. // =========
  1344. // Fog
  1345. // =========
  1346. // FIXME: Fog is implemented the same as ShaderGen, but ShaderGen's fog is all hacks.
  1347. // Should be fixed point, and should not make guesses about Range-Based adjustments.
  1348. out.Write(" // Fog\n"
  1349. " uint fog_function = {};\n",
  1350. BitfieldExtract<&FogParam3::fsel>("bpmem_fogParam3"));
  1351. out.Write(" if (fog_function != {:s}) {{\n", FogType::Off);
  1352. out.Write(" // TODO: This all needs to be converted from float to fixed point\n"
  1353. " float ze;\n"
  1354. " if ({} == 0u) {{\n",
  1355. BitfieldExtract<&FogParam3::proj>("bpmem_fogParam3"));
  1356. out.Write(" // perspective\n"
  1357. " // ze = A/(B - (Zs >> B_SHF)\n"
  1358. " ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
  1359. ".w));\n"
  1360. " }} else {{\n"
  1361. " // orthographic\n"
  1362. " // ze = a*Zs (here, no B_SHF)\n"
  1363. " ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n"
  1364. " }}\n"
  1365. "\n"
  1366. " if (bool({})) {{\n",
  1367. BitfieldExtract<&FogRangeParams::RangeBase::Enabled>("bpmem_fogRangeBase"));
  1368. out.Write(" // x_adjust = sqrt((x-center)^2 + k^2)/k\n"
  1369. " // ze *= x_adjust\n"
  1370. " float offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
  1371. " float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n"
  1372. " uint indexlower = uint(floatindex);\n"
  1373. " uint indexupper = indexlower + 1u;\n"
  1374. " float klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n"
  1375. " float kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n"
  1376. " float k = lerp(klower, kupper, frac(floatindex));\n"
  1377. " float x_adjust = sqrt(offset * offset + k * k) / k;\n"
  1378. " ze *= x_adjust;\n"
  1379. " }}\n"
  1380. "\n"
  1381. " float fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n"
  1382. "\n");
  1383. out.Write(" if (fog_function >= {:s}) {{\n", FogType::Exp);
  1384. out.Write(" switch (fog_function) {{\n"
  1385. " case {:s}:\n"
  1386. " fog = 1.0 - exp2(-8.0 * fog);\n"
  1387. " break;\n",
  1388. FogType::Exp);
  1389. out.Write(" case {:s}:\n"
  1390. " fog = 1.0 - exp2(-8.0 * fog * fog);\n"
  1391. " break;\n",
  1392. FogType::ExpSq);
  1393. out.Write(" case {:s}:\n"
  1394. " fog = exp2(-8.0 * (1.0 - fog));\n"
  1395. " break;\n",
  1396. FogType::BackwardsExp);
  1397. out.Write(" case {:s}:\n"
  1398. " fog = 1.0 - fog;\n"
  1399. " fog = exp2(-8.0 * fog * fog);\n"
  1400. " break;\n",
  1401. FogType::BackwardsExpSq);
  1402. out.Write(" }}\n"
  1403. " }}\n"
  1404. "\n"
  1405. " int ifog = iround(fog * 256.0);\n"
  1406. " TevResult.rgb = (TevResult.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"
  1407. " }}\n"
  1408. "\n");
  1409. for (std::size_t i = 0; i < custom_details.shaders.size(); i++)
  1410. {
  1411. const auto& shader_details = custom_details.shaders[i];
  1412. if (!shader_details.custom_shader.empty())
  1413. {
  1414. out.Write("\t{{\n");
  1415. out.Write("\t\tcustom_data.final_color = float4(TevResult.r / 255.0, TevResult.g / 255.0, "
  1416. "TevResult.b / 255.0, TevResult.a / 255.0);\n");
  1417. out.Write("\t\tCustomShaderOutput custom_output = {}_{}(custom_data);\n",
  1418. CUSTOM_PIXELSHADER_COLOR_FUNC, i);
  1419. out.Write(
  1420. "\t\tTevResult = int4(custom_output.main_rt.r * 255, custom_output.main_rt.g * 255, "
  1421. "custom_output.main_rt.b * 255, custom_output.main_rt.a * 255);\n");
  1422. out.Write("\t}}\n\n");
  1423. }
  1424. }
  1425. if (use_framebuffer_fetch)
  1426. {
  1427. static constexpr std::array<const char*, 16> logic_op_mode{
  1428. "int4(0, 0, 0, 0)", // CLEAR
  1429. "TevResult & fb_value", // AND
  1430. "TevResult & ~fb_value", // AND_REVERSE
  1431. "TevResult", // COPY
  1432. "~TevResult & fb_value", // AND_INVERTED
  1433. "fb_value", // NOOP
  1434. "TevResult ^ fb_value", // XOR
  1435. "TevResult | fb_value", // OR
  1436. "~(TevResult | fb_value)", // NOR
  1437. "~(TevResult ^ fb_value)", // EQUIV
  1438. "~fb_value", // INVERT
  1439. "TevResult | ~fb_value", // OR_REVERSE
  1440. "~TevResult", // COPY_INVERTED
  1441. "~TevResult | fb_value", // OR_INVERTED
  1442. "~(TevResult & fb_value)", // NAND
  1443. "int4(255, 255, 255, 255)", // SET
  1444. };
  1445. out.Write(" // Logic Ops\n"
  1446. " if (logic_op_enable) {{\n"
  1447. " int4 fb_value = iround(initial_ocol0 * 255.0);"
  1448. " switch (logic_op_mode) {{\n");
  1449. for (size_t i = 0; i < logic_op_mode.size(); i++)
  1450. {
  1451. out.Write(" case {}u: TevResult = {}; break;\n", i, logic_op_mode[i]);
  1452. }
  1453. out.Write(" }}\n"
  1454. " TevResult &= 0xff;\n"
  1455. " }}\n");
  1456. }
  1457. else if (!host_config.backend_logic_op)
  1458. {
  1459. out.Write(" // Helpers for logic op blending approximations\n"
  1460. " if (logic_op_enable) {{\n"
  1461. " switch (logic_op_mode) {{\n");
  1462. out.Write(" case {}: // Clear\n", static_cast<u32>(LogicOp::Clear));
  1463. out.Write(" TevResult = int4(0, 0, 0, 0);\n"
  1464. " break;\n");
  1465. out.Write(" case {}: // Copy Inverted\n", static_cast<u32>(LogicOp::CopyInverted));
  1466. out.Write(" TevResult ^= 0xff;\n"
  1467. " break;\n");
  1468. out.Write(" case {}: // Set\n", static_cast<u32>(LogicOp::Set));
  1469. out.Write(" case {}: // Invert\n", static_cast<u32>(LogicOp::Invert));
  1470. out.Write(" TevResult = int4(255, 255, 255, 255);\n"
  1471. " break;\n");
  1472. out.Write(" default:\n"
  1473. " break;\n"
  1474. " }}\n"
  1475. " }}\n");
  1476. }
  1477. // Some backends require that the shader outputs be uint when writing to a uint render target
  1478. // for logic op.
  1479. if (uid_data->uint_output)
  1480. {
  1481. out.Write(" if (bpmem_rgba6_format)\n"
  1482. " ocol0 = uint4(TevResult & 0xFC);\n"
  1483. " else\n"
  1484. " ocol0 = uint4(TevResult);\n"
  1485. "\n");
  1486. }
  1487. else
  1488. {
  1489. out.Write(" if (bpmem_rgba6_format)\n"
  1490. " ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;\n"
  1491. " else\n"
  1492. " ocol0.rgb = float3(TevResult.rgb) / 255.0;\n"
  1493. "\n"
  1494. " if (bpmem_dstalpha != 0u)\n");
  1495. out.Write(" ocol0.a = float({} >> 2) / 63.0;\n",
  1496. BitfieldExtract<&ConstantAlpha::alpha>("bpmem_dstalpha"));
  1497. out.Write(" else\n"
  1498. " ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
  1499. " \n");
  1500. if (use_dual_source || use_framebuffer_fetch)
  1501. {
  1502. out.Write(" // Dest alpha override (dual source blending)\n"
  1503. " // Colors will be blended against the alpha from ocol1 and\n"
  1504. " // the alpha from ocol0 will be written to the framebuffer.\n"
  1505. " ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0);\n");
  1506. }
  1507. }
  1508. if (bounding_box)
  1509. {
  1510. out.Write(" if (bpmem_bounding_box) {{\n"
  1511. " UpdateBoundingBox(rawpos.xy);\n"
  1512. " }}\n");
  1513. }
  1514. if (use_framebuffer_fetch)
  1515. {
  1516. using Common::EnumMap;
  1517. static constexpr EnumMap<std::string_view, SrcBlendFactor::InvDstAlpha> blendSrcFactor{
  1518. "blend_src.rgb = float3(0,0,0);", // ZERO
  1519. "blend_src.rgb = float3(1,1,1);", // ONE
  1520. "blend_src.rgb = initial_ocol0.rgb;", // DSTCLR
  1521. "blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
  1522. "blend_src.rgb = src_color.aaa;", // SRCALPHA
  1523. "blend_src.rgb = float3(1,1,1) - src_color.aaa;", // INVSRCALPHA
  1524. "blend_src.rgb = initial_ocol0.aaa;", // DSTALPHA
  1525. "blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
  1526. };
  1527. static constexpr EnumMap<std::string_view, SrcBlendFactor::InvDstAlpha> blendSrcFactorAlpha{
  1528. "blend_src.a = 0.0;", // ZERO
  1529. "blend_src.a = 1.0;", // ONE
  1530. "blend_src.a = initial_ocol0.a;", // DSTCLR
  1531. "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTCLR
  1532. "blend_src.a = src_color.a;", // SRCALPHA
  1533. "blend_src.a = 1.0 - src_color.a;", // INVSRCALPHA
  1534. "blend_src.a = initial_ocol0.a;", // DSTALPHA
  1535. "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA
  1536. };
  1537. static constexpr EnumMap<std::string_view, DstBlendFactor::InvDstAlpha> blendDstFactor{
  1538. "blend_dst.rgb = float3(0,0,0);", // ZERO
  1539. "blend_dst.rgb = float3(1,1,1);", // ONE
  1540. "blend_dst.rgb = ocol0.rgb;", // SRCCLR
  1541. "blend_dst.rgb = float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
  1542. "blend_dst.rgb = src_color.aaa;", // SRCALHA
  1543. "blend_dst.rgb = float3(1,1,1) - src_color.aaa;", // INVSRCALPHA
  1544. "blend_dst.rgb = initial_ocol0.aaa;", // DSTALPHA
  1545. "blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
  1546. };
  1547. static constexpr EnumMap<std::string_view, DstBlendFactor::InvDstAlpha> blendDstFactorAlpha{
  1548. "blend_dst.a = 0.0;", // ZERO
  1549. "blend_dst.a = 1.0;", // ONE
  1550. "blend_dst.a = ocol0.a;", // SRCCLR
  1551. "blend_dst.a = 1.0 - ocol0.a;", // INVSRCCLR
  1552. "blend_dst.a = src_color.a;", // SRCALPHA
  1553. "blend_dst.a = 1.0 - src_color.a;", // INVSRCALPHA
  1554. "blend_dst.a = initial_ocol0.a;", // DSTALPHA
  1555. "blend_dst.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA
  1556. };
  1557. out.Write(" if (blend_enable) {{\n"
  1558. " float4 src_color;\n"
  1559. " if (bpmem_dstalpha != 0u) {{\n"
  1560. " src_color = ocol1;\n"
  1561. " }} else {{\n"
  1562. " src_color = ocol0;\n"
  1563. " }}"
  1564. " float4 blend_src;\n");
  1565. WriteSwitch(out, api_type, "blend_src_factor", blendSrcFactor, 4, true);
  1566. WriteSwitch(out, api_type, "blend_src_factor_alpha", blendSrcFactorAlpha, 4, true);
  1567. out.Write(" float4 blend_dst;\n");
  1568. WriteSwitch(out, api_type, "blend_dst_factor", blendDstFactor, 4, true);
  1569. WriteSwitch(out, api_type, "blend_dst_factor_alpha", blendDstFactorAlpha, 4, true);
  1570. out.Write(" float4 blend_result;\n"
  1571. " if (blend_subtract)\n"
  1572. " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * "
  1573. "blend_src.rgb;\n"
  1574. " else\n"
  1575. " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * "
  1576. "blend_src.rgb;\n");
  1577. out.Write(" if (blend_subtract_alpha)\n"
  1578. " blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"
  1579. " else\n"
  1580. " blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
  1581. out.Write(" real_ocol0 = blend_result;\n");
  1582. out.Write(" }} else {{\n"
  1583. " real_ocol0 = ocol0;\n"
  1584. " }}\n");
  1585. }
  1586. out.Write("}}\n"
  1587. "\n"
  1588. "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {{\n"
  1589. " // Select Ras for stage\n"
  1590. " uint ras = {};\n",
  1591. BitfieldExtract<&TwoTevStageOrders::colorchan_even>("ss.order"));
  1592. out.Write(" if (ras < 2u) {{ // Lighting Channel 0 or 1\n"
  1593. " int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);\n"
  1594. " uint swap = {};\n",
  1595. BitfieldExtract<&TevStageCombiner::AlphaCombiner::rswap>("ss.ac"));
  1596. out.Write(" return Swizzle(swap, color);\n");
  1597. out.Write(" }} else if (ras == 5u) {{ // Alpha Bump\n"
  1598. " return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump);\n"
  1599. " }} else if (ras == 6u) {{ // Normalzied Alpha Bump\n"
  1600. " int normalized = s.AlphaBump | s.AlphaBump >> 5;\n"
  1601. " return int4(normalized, normalized, normalized, normalized);\n"
  1602. " }} else {{\n"
  1603. " return int4(0, 0, 0, 0);\n"
  1604. " }}\n"
  1605. "}}\n"
  1606. "\n"
  1607. "int4 getKonstColor(State s, StageState ss) {{\n"
  1608. " // Select Konst for stage\n"
  1609. " // TODO: a switch case might be better here than an dynamically"
  1610. " // indexed uniform lookup\n"
  1611. " uint tevksel = bpmem_tevksel(ss.stage>>1);\n"
  1612. " if ((ss.stage & 1u) == 0u)\n"
  1613. " return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n",
  1614. BitfieldExtract<&TevKSel::kcsel_even>("tevksel"),
  1615. BitfieldExtract<&TevKSel::kasel_even>("tevksel"));
  1616. out.Write(" else\n"
  1617. " return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n",
  1618. BitfieldExtract<&TevKSel::kcsel_odd>("tevksel"),
  1619. BitfieldExtract<&TevKSel::kasel_odd>("tevksel"));
  1620. out.Write("}}\n");
  1621. return out;
  1622. }
  1623. void EnumeratePixelShaderUids(const std::function<void(const PixelShaderUid&)>& callback)
  1624. {
  1625. PixelShaderUid uid;
  1626. for (u32 texgens = 0; texgens <= 8; texgens++)
  1627. {
  1628. pixel_ubershader_uid_data* const puid = uid.GetUidData();
  1629. puid->num_texgens = texgens;
  1630. for (u32 early_depth = 0; early_depth < 2; early_depth++)
  1631. {
  1632. puid->early_depth = early_depth != 0;
  1633. for (u32 per_pixel_depth = 0; per_pixel_depth < 2; per_pixel_depth++)
  1634. {
  1635. // Don't generate shaders where we have early depth tests enabled, and write gl_FragDepth.
  1636. if (early_depth && per_pixel_depth)
  1637. continue;
  1638. puid->per_pixel_depth = per_pixel_depth != 0;
  1639. for (u32 uint_output = 0; uint_output < 2; uint_output++)
  1640. {
  1641. puid->uint_output = uint_output;
  1642. for (u32 no_dual_src = 0; no_dual_src < 2; no_dual_src++)
  1643. {
  1644. puid->no_dual_src = no_dual_src;
  1645. callback(uid);
  1646. }
  1647. }
  1648. }
  1649. }
  1650. }
  1651. }
  1652. } // namespace UberShader