TextureConverterShaderGen.cpp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. // Copyright 2017 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoCommon/TextureConverterShaderGen.h"
  4. #include "Common/Assert.h"
  5. #include "Common/CommonTypes.h"
  6. #include "VideoCommon/BPMemory.h"
  7. #include "VideoCommon/TextureCacheBase.h"
  8. #include "VideoCommon/VideoCommon.h"
  9. #include "VideoCommon/VideoConfig.h"
  10. namespace TextureConversionShaderGen
  11. {
  12. TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
  13. bool scale_by_half, float gamma_rcp,
  14. const std::array<u32, 3>& filter_coefficients)
  15. {
  16. TCShaderUid out;
  17. UidData* const uid_data = out.GetUidData();
  18. if (g_ActiveConfig.bForceTrueColor)
  19. {
  20. // Increase the precision of EFB copies where it's likely to be safe.
  21. switch (dst_format)
  22. {
  23. case EFBCopyFormat::RGB565:
  24. // HACK: XFB is RGB8.
  25. // Don't blindly do this in other places though,
  26. // the enum value is used to identify XFB copies.
  27. // The important thing here is that we need alpha = 1.
  28. dst_format = EFBCopyFormat::XFB;
  29. break;
  30. case EFBCopyFormat::RGB5A3:
  31. dst_format = EFBCopyFormat::RGBA8;
  32. break;
  33. default:
  34. // Let's not touch the other formats for now, seems risky.
  35. break;
  36. }
  37. }
  38. uid_data->dst_format = dst_format;
  39. uid_data->efb_has_alpha = bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24;
  40. uid_data->is_depth_copy = is_depth_copy;
  41. uid_data->is_intensity = is_intensity;
  42. uid_data->scale_by_half = scale_by_half;
  43. uid_data->all_copy_filter_coefs_needed =
  44. TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients);
  45. uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients);
  46. // If the gamma is needed, then include that too.
  47. uid_data->apply_gamma = gamma_rcp != 1.0f;
  48. return out;
  49. }
  50. static void WriteHeader(APIType api_type, ShaderCode& out)
  51. {
  52. out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
  53. " float2 src_offset, src_size;\n"
  54. " uint3 filter_coefficients;\n"
  55. " float gamma_rcp;\n"
  56. " float2 clamp_tb;\n"
  57. " float pixel_height;\n"
  58. "}};\n");
  59. }
  60. ShaderCode GenerateVertexShader(APIType api_type)
  61. {
  62. ShaderCode out;
  63. WriteHeader(api_type, out);
  64. if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
  65. {
  66. out.Write("VARYING_LOCATION(0) out VertexData {{\n"
  67. " float3 v_tex0;\n"
  68. "}};\n");
  69. }
  70. else
  71. {
  72. out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n");
  73. }
  74. out.Write("#define id gl_VertexID\n"
  75. "#define opos gl_Position\n"
  76. "void main() {{\n");
  77. out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
  78. out.Write(
  79. " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
  80. out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
  81. // NDC space is flipped in Vulkan
  82. if (api_type == APIType::Vulkan)
  83. out.Write(" opos.y = -opos.y;\n");
  84. out.Write("}}\n");
  85. return out;
  86. }
  87. ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
  88. {
  89. const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
  90. ShaderCode out;
  91. WriteHeader(api_type, out);
  92. out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
  93. out.Write("uint4 SampleEFB(float3 uv, float y_offset) {{\n"
  94. " float4 tex_sample = texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * "
  95. "pixel_height), clamp_tb.x, clamp_tb.y), {}));\n",
  96. mono_depth ? "0.0" : "uv.z");
  97. if (uid_data->is_depth_copy)
  98. {
  99. if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
  100. out.Write(" tex_sample.x = 1.0 - tex_sample.x;\n");
  101. out.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n"
  102. " return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n"
  103. "}}\n");
  104. }
  105. else
  106. {
  107. out.Write(" return uint4(tex_sample * 255.0);\n"
  108. "}}\n");
  109. }
  110. if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
  111. {
  112. out.Write("VARYING_LOCATION(0) in VertexData {{\n"
  113. " float3 v_tex0;\n"
  114. "}};\n");
  115. }
  116. else
  117. {
  118. out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
  119. }
  120. out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
  121. "void main()\n{{\n");
  122. // The copy filter applies to both color and depth copies. This has been verified on hardware.
  123. // The filter is only applied to the RGB channels, the alpha channel is left intact.
  124. if (uid_data->all_copy_filter_coefs_needed)
  125. {
  126. out.Write(" uint4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
  127. " uint4 current_row = SampleEFB(v_tex0, 0.0f);\n"
  128. " uint4 next_row = SampleEFB(v_tex0, 1.0f);\n"
  129. " uint3 combined_rows = prev_row.rgb * filter_coefficients[0] +\n"
  130. " current_row.rgb * filter_coefficients[1] +\n"
  131. " next_row.rgb * filter_coefficients[2];\n");
  132. }
  133. else
  134. {
  135. out.Write(" uint4 current_row = SampleEFB(v_tex0, 0.0f);\n"
  136. " uint3 combined_rows = current_row.rgb * filter_coefficients[1];\n");
  137. }
  138. out.Write(" // Shift right by 6 to divide by 64, as filter coefficients\n"
  139. " // that sum to 64 result in no change in brightness\n"
  140. " uint4 texcol_raw = uint4(combined_rows.rgb >> 6, {});\n",
  141. uid_data->efb_has_alpha ? "current_row.a" : "255");
  142. if (uid_data->copy_filter_can_overflow)
  143. out.Write(" texcol_raw &= 0x1ffu;\n");
  144. // Note that overflow occurs when the sum of values is >= 128, but this max situation can be hit
  145. // on >= 64, so we always include it.
  146. out.Write(" texcol_raw = min(texcol_raw, uint4(255, 255, 255, 255));\n");
  147. if (uid_data->apply_gamma)
  148. {
  149. out.Write(" texcol_raw = uint4(round(pow(abs(float4(texcol_raw) / 255.0),\n"
  150. " float4(gamma_rcp, gamma_rcp, gamma_rcp, 1.0)) * 255.0));\n");
  151. }
  152. if (uid_data->is_intensity)
  153. {
  154. out.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n"
  155. " const float4 y_const = float4( 66, 129, 25, 16);\n"
  156. " const float4 u_const = float4(-38, -74, 112, 128);\n"
  157. " const float4 v_const = float4(112, -94, -18, 128);\n"
  158. " // Intensity/YUV format conversion\n"
  159. " texcol_raw.rgb = uint3(dot(y_const, float4(texcol_raw.rgb, 256)),\n"
  160. " dot(u_const, float4(texcol_raw.rgb, 256)),\n"
  161. " dot(v_const, float4(texcol_raw.rgb, 256)));\n"
  162. " // Divide by 256 and round .5 and higher up\n"
  163. " texcol_raw.rgb = (texcol_raw.rgb >> 8) + ((texcol_raw.rgb >> 7) & 1u);\n");
  164. }
  165. switch (uid_data->dst_format)
  166. {
  167. case EFBCopyFormat::R4: // R4
  168. out.Write(" float red = float(texcol_raw.r & 0xF0u) / 240.0;\n"
  169. " ocol0 = float4(red, red, red, red);\n");
  170. break;
  171. case EFBCopyFormat::R8_0x1: // R8
  172. case EFBCopyFormat::R8: // R8
  173. out.Write(" ocol0 = float4(texcol_raw).rrrr / 255.0;\n");
  174. break;
  175. case EFBCopyFormat::RA4: // RA4
  176. out.Write(" float2 red_alpha = float2(texcol_raw.ra & 0xF0u) / 240.0;\n"
  177. " ocol0 = red_alpha.rrrg;\n");
  178. break;
  179. case EFBCopyFormat::RA8: // RA8
  180. out.Write(" ocol0 = float4(texcol_raw).rrra / 255.0;\n");
  181. break;
  182. case EFBCopyFormat::A8: // A8
  183. out.Write(" ocol0 = float4(texcol_raw).aaaa / 255.0;\n");
  184. break;
  185. case EFBCopyFormat::G8: // G8
  186. out.Write(" ocol0 = float4(texcol_raw).gggg / 255.0;\n");
  187. break;
  188. case EFBCopyFormat::B8: // B8
  189. out.Write(" ocol0 = float4(texcol_raw).bbbb / 255.0;\n");
  190. break;
  191. case EFBCopyFormat::RG8: // RG8
  192. out.Write(" ocol0 = float4(texcol_raw).rrrg / 255.0;\n");
  193. break;
  194. case EFBCopyFormat::GB8: // GB8
  195. out.Write(" ocol0 = float4(texcol_raw).gggb / 255.0;\n");
  196. break;
  197. case EFBCopyFormat::RGB565: // RGB565
  198. out.Write(" float2 red_blue = float2(texcol_raw.rb & 0xF8u) / 248.0;\n"
  199. " float green = float(texcol_raw.g & 0xFCu) / 252.0;\n"
  200. " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
  201. break;
  202. case EFBCopyFormat::RGB5A3: // RGB5A3
  203. // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
  204. // will need to be implemented once we move away from floats.
  205. out.Write(" float3 color = float3(texcol_raw.rgb & 0xF8u) / 248.0;\n"
  206. " float alpha = float(texcol_raw.a & 0xE0u) / 224.0;\n"
  207. " ocol0 = float4(color, alpha);\n");
  208. break;
  209. case EFBCopyFormat::RGBA8: // RGBA8
  210. out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n");
  211. break;
  212. case EFBCopyFormat::XFB:
  213. out.Write(" ocol0 = float4(float3(texcol_raw.rgb) / 255.0, 1.0);\n");
  214. break;
  215. default:
  216. ERROR_LOG_FMT(VIDEO, "Unknown copy/intensity color format: {} {}", uid_data->dst_format,
  217. uid_data->is_intensity);
  218. out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n");
  219. break;
  220. }
  221. out.Write("}}\n");
  222. return out;
  223. }
  224. } // namespace TextureConversionShaderGen