default_pre_post_process.glsl 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /***** COLOR CORRECTION *****/
  2. // Color Space references:
  3. // https://www.unravel.com.au/understanding-color-spaces
  4. // SMPTE 170M - BT.601 (NTSC-M) -> BT.709
  5. mat3 from_NTSCM = transpose(mat3(
  6. 0.939497225737661, 0.0502268452914346, 0.0102759289709032,
  7. 0.0177558637510127, 0.965824605885027, 0.0164195303639603,
  8. -0.00162163209967010, -0.00437400622653655, 1.00599563832621));
  9. // ARIB TR-B9 (9300K+27MPCD with chromatic adaptation) (NTSC-J) -> BT.709
  10. mat3 from_NTSCJ = transpose(mat3(
  11. 0.823613036967492, -0.0943227111084757, 0.00799341532931119,
  12. 0.0289258355537324, 1.02310733489462, 0.00243547111576797,
  13. -0.00569501554980891, 0.0161828357559315, 1.22328453915712));
  14. // EBU - BT.470BG/BT.601 (PAL) -> BT.709
  15. mat3 from_PAL = transpose(mat3(
  16. 1.04408168421813, -0.0440816842181253, 0.000000000000000,
  17. 0.000000000000000, 1.00000000000000, 0.000000000000000,
  18. 0.000000000000000, 0.0118044782106489, 0.988195521789351));
  19. float3 LinearTosRGBGamma(float3 color)
  20. {
  21. const float a = 0.055;
  22. for (int i = 0; i < 3; ++i)
  23. {
  24. float x = color[i];
  25. if (x <= 0.0031308)
  26. x = x * 12.92;
  27. else
  28. x = (1.0 + a) * pow(x, 1.0 / 2.4) - a;
  29. color[i] = x;
  30. }
  31. return color;
  32. }
  33. /***** COLOR SAMPLING *****/
  34. // Non filtered gamma corrected sample (nearest neighbor)
  35. float4 QuickSample(float3 uvw, float gamma)
  36. {
  37. #if 0 // Test sampling range
  38. const float threshold = 0.00000001;
  39. float2 xy = uvw.xy * GetResolution();
  40. // Sampling outside the valid range, draw in yellow
  41. if (xy.x < (0.0 - threshold) || xy.x > (GetResolution().x + threshold) || xy.y < (0.0 - threshold) || xy.y > (GetResolution().y + threshold))
  42. return float4(1.0, 1.0, 0.0, 1);
  43. // Sampling at the edges, draw in purple
  44. if (xy.x < 1.0 || xy.x > (GetResolution().x - 1.0) || xy.y < 1.0 || xy.y > (GetResolution().y - 1.0))
  45. return float4(0.5, 0, 0.5, 1);
  46. #endif
  47. float4 color = texture(samp1, uvw);
  48. color.rgb = pow(color.rgb, float3(gamma));
  49. return color;
  50. }
  51. float4 QuickSample(float2 uv, float w, float gamma)
  52. {
  53. return QuickSample(float3(uv, w), gamma);
  54. }
  55. float4 QuickSampleByPixel(float2 xy, float w, float gamma)
  56. {
  57. float3 uvw = float3(xy * GetInvResolution(), w);
  58. return QuickSample(uvw, gamma);
  59. }
  60. /***** Bilinear Interpolation *****/
  61. float4 BilinearSample(float3 uvw, float gamma)
  62. {
  63. // This emulates the (bi)linear filtering done directly from GPUs HW.
  64. // Note that GPUs might natively filter red green and blue differently, but we don't do it.
  65. // They might also use different filtering between upscaling and downscaling.
  66. float2 source_size = GetResolution();
  67. float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner
  68. // Find the integer and floating point parts
  69. float2 int_pixel = floor(pixel);
  70. float2 frac_pixel = fract(pixel);
  71. // Take 4 samples around the original uvw
  72. float4 c11 = QuickSampleByPixel(int_pixel + float2(0.5, 0.5), uvw.z, gamma);
  73. float4 c21 = QuickSampleByPixel(int_pixel + float2(1.5, 0.5), uvw.z, gamma);
  74. float4 c12 = QuickSampleByPixel(int_pixel + float2(0.5, 1.5), uvw.z, gamma);
  75. float4 c22 = QuickSampleByPixel(int_pixel + float2(1.5, 1.5), uvw.z, gamma);
  76. // Blend the 4 samples by their weight
  77. return lerp(lerp(c11, c21, frac_pixel.x), lerp(c12, c22, frac_pixel.x), frac_pixel.y);
  78. }
  79. /***** Bicubic Interpolation *****/
  80. // Formula derived from:
  81. // https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters#Definition
  82. // Values from:
  83. // https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/#mitchell-netravali-bicubic
  84. // Other references:
  85. // https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL
  86. // https://github.com/ValveSoftware/gamescope/pull/740
  87. // https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl
  88. #define CUBIC_COEFF_GEN(B, C) \
  89. (mat4(/* t^0 */ ((B) / 6.0), (-(B) / 3.0 + 1.0), ((B) / 6.0), (0.0), \
  90. /* t^1 */ (-(B) / 2.0 - (C)), (0.0), ((B) / 2.0 + (C)), (0.0), \
  91. /* t^2 */ ((B) / 2.0 + 2.0 * (C)), (2.0 * (B) + (C)-3.0), \
  92. (-5.0 * (B) / 2.0 - 2.0 * (C) + 3.0), (-(C)), \
  93. /* t^3 */ (-(B) / 6.0 - (C)), (-3.0 * (B) / 2.0 - (C) + 2.0), \
  94. (3.0 * (B) / 2.0 + (C)-2.0), ((B) / 6.0 + (C))))
  95. float4 CubicCoeffs(float t, mat4 coeffs)
  96. {
  97. return coeffs * float4(1.0, t, t * t, t * t * t);
  98. }
  99. float4 CubicMix(float4 c0, float4 c1, float4 c2, float4 c3, float4 coeffs)
  100. {
  101. return c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2] + c3 * coeffs[3];
  102. }
  103. // By Sam Belliveau. Public Domain license.
  104. // Simple 16 tap, gamma correct, implementation of bicubic filtering.
  105. float4 BicubicSample(float3 uvw, float gamma, mat4 coeffs)
  106. {
  107. float2 pixel = (uvw.xy * GetResolution()) - 0.5;
  108. float2 int_pixel = floor(pixel);
  109. float2 frac_pixel = fract(pixel);
  110. float4 c00 = QuickSampleByPixel(int_pixel + float2(-0.5, -0.5), uvw.z, gamma);
  111. float4 c10 = QuickSampleByPixel(int_pixel + float2(+0.5, -0.5), uvw.z, gamma);
  112. float4 c20 = QuickSampleByPixel(int_pixel + float2(+1.5, -0.5), uvw.z, gamma);
  113. float4 c30 = QuickSampleByPixel(int_pixel + float2(+2.5, -0.5), uvw.z, gamma);
  114. float4 c01 = QuickSampleByPixel(int_pixel + float2(-0.5, +0.5), uvw.z, gamma);
  115. float4 c11 = QuickSampleByPixel(int_pixel + float2(+0.5, +0.5), uvw.z, gamma);
  116. float4 c21 = QuickSampleByPixel(int_pixel + float2(+1.5, +0.5), uvw.z, gamma);
  117. float4 c31 = QuickSampleByPixel(int_pixel + float2(+2.5, +0.5), uvw.z, gamma);
  118. float4 c02 = QuickSampleByPixel(int_pixel + float2(-0.5, +1.5), uvw.z, gamma);
  119. float4 c12 = QuickSampleByPixel(int_pixel + float2(+0.5, +1.5), uvw.z, gamma);
  120. float4 c22 = QuickSampleByPixel(int_pixel + float2(+1.5, +1.5), uvw.z, gamma);
  121. float4 c32 = QuickSampleByPixel(int_pixel + float2(+2.5, +1.5), uvw.z, gamma);
  122. float4 c03 = QuickSampleByPixel(int_pixel + float2(-0.5, +2.5), uvw.z, gamma);
  123. float4 c13 = QuickSampleByPixel(int_pixel + float2(+0.5, +2.5), uvw.z, gamma);
  124. float4 c23 = QuickSampleByPixel(int_pixel + float2(+1.5, +2.5), uvw.z, gamma);
  125. float4 c33 = QuickSampleByPixel(int_pixel + float2(+2.5, +2.5), uvw.z, gamma);
  126. float4 cx = CubicCoeffs(frac_pixel.x, coeffs);
  127. float4 cy = CubicCoeffs(frac_pixel.y, coeffs);
  128. float4 x0 = CubicMix(c00, c10, c20, c30, cx);
  129. float4 x1 = CubicMix(c01, c11, c21, c31, cx);
  130. float4 x2 = CubicMix(c02, c12, c22, c32, cx);
  131. float4 x3 = CubicMix(c03, c13, c23, c33, cx);
  132. return CubicMix(x0, x1, x2, x3, cy);
  133. }
  134. /***** Sharp Bilinear Filtering *****/
  135. // Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
  136. // by Themaister, Public Domain license
  137. // Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale,
  138. // giving a sharper image than plain bilinear.
  139. float4 SharpBilinearSample(float3 uvw, float gamma)
  140. {
  141. float2 source_size = GetResolution();
  142. float2 inverted_source_size = GetInvResolution();
  143. float2 target_size = GetWindowResolution();
  144. float2 texel = uvw.xy * source_size;
  145. float2 texel_floored = floor(texel);
  146. float2 s = fract(texel);
  147. float scale = max(floor(max(target_size.x * inverted_source_size.x, target_size.y * inverted_source_size.y)), 1.f);
  148. float region_range = 0.5 - (0.5 / scale);
  149. // Figure out where in the texel to sample to get correct pre-scaled bilinear.
  150. float2 center_dist = s - 0.5;
  151. float2 f = ((center_dist - clamp(center_dist, -region_range, region_range)) * scale) + 0.5;
  152. float2 mod_texel = texel_floored + f;
  153. uvw.xy = mod_texel * inverted_source_size;
  154. return BilinearSample(uvw, gamma);
  155. }
  156. /***** Area Sampling *****/
  157. // By Sam Belliveau and Filippo Tarpini. Public Domain license.
  158. // Effectively a more accurate sharp bilinear filter when upscaling,
  159. // that also works as a mathematically perfect downscale filter.
  160. // https://entropymine.com/imageworsener/pixelmixing/
  161. // https://github.com/obsproject/obs-studio/pull/1715
  162. // https://legacy.imagemagick.org/Usage/filter/
  163. float4 AreaSampling(float3 uvw, float gamma)
  164. {
  165. // Determine the sizes of the source and target images.
  166. float2 source_size = GetResolution();
  167. float2 target_size = GetWindowResolution();
  168. float2 inverted_target_size = GetInvWindowResolution();
  169. // Compute the top-left and bottom-right corners of the target pixel box.
  170. float2 t_beg = floor(uvw.xy * target_size);
  171. float2 t_end = t_beg + float2(1.0, 1.0);
  172. // Convert the target pixel box to source pixel box.
  173. float2 beg = t_beg * inverted_target_size * source_size;
  174. float2 end = t_end * inverted_target_size * source_size;
  175. // Compute the top-left and bottom-right corners of the pixel box.
  176. float2 f_beg = floor(beg);
  177. float2 f_end = floor(end);
  178. // Compute how much of the start and end pixels are covered horizontally & vertically.
  179. float area_w = 1.0 - fract(beg.x);
  180. float area_n = 1.0 - fract(beg.y);
  181. float area_e = fract(end.x);
  182. float area_s = fract(end.y);
  183. // Compute the areas of the corner pixels in the pixel box.
  184. float area_nw = area_n * area_w;
  185. float area_ne = area_n * area_e;
  186. float area_sw = area_s * area_w;
  187. float area_se = area_s * area_e;
  188. // Initialize the color accumulator.
  189. float4 avg_color = float4(0.0, 0.0, 0.0, 0.0);
  190. // Prevents rounding errors due to the coordinates flooring above
  191. const float2 offset = float2(0.5, 0.5);
  192. // Accumulate corner pixels.
  193. avg_color += area_nw * QuickSampleByPixel(float2(f_beg.x, f_beg.y) + offset, uvw.z, gamma);
  194. avg_color += area_ne * QuickSampleByPixel(float2(f_end.x, f_beg.y) + offset, uvw.z, gamma);
  195. avg_color += area_sw * QuickSampleByPixel(float2(f_beg.x, f_end.y) + offset, uvw.z, gamma);
  196. avg_color += area_se * QuickSampleByPixel(float2(f_end.x, f_end.y) + offset, uvw.z, gamma);
  197. // Determine the size of the pixel box.
  198. int x_range = int(f_end.x - f_beg.x - 0.5);
  199. int y_range = int(f_end.y - f_beg.y - 0.5);
  200. // Workaround to compile the shader with DX11/12.
  201. // If this isn't done, it will complain that the loop could have too many iterations.
  202. // This number should be enough to guarantee downscaling from very high to very small resolutions.
  203. // Note that this number might be referenced in the UI.
  204. const int max_iterations = 16;
  205. // Fix up the average calculations in case we reached the upper limit
  206. x_range = min(x_range, max_iterations);
  207. y_range = min(y_range, max_iterations);
  208. // Accumulate top and bottom edge pixels.
  209. for (int ix = 0; ix < max_iterations; ++ix)
  210. {
  211. if (ix < x_range)
  212. {
  213. float x = f_beg.x + 1.0 + float(ix);
  214. avg_color += area_n * QuickSampleByPixel(float2(x, f_beg.y) + offset, uvw.z, gamma);
  215. avg_color += area_s * QuickSampleByPixel(float2(x, f_end.y) + offset, uvw.z, gamma);
  216. }
  217. }
  218. // Accumulate left and right edge pixels and all the pixels in between.
  219. for (int iy = 0; iy < max_iterations; ++iy)
  220. {
  221. if (iy < y_range)
  222. {
  223. float y = f_beg.y + 1.0 + float(iy);
  224. avg_color += area_w * QuickSampleByPixel(float2(f_beg.x, y) + offset, uvw.z, gamma);
  225. avg_color += area_e * QuickSampleByPixel(float2(f_end.x, y) + offset, uvw.z, gamma);
  226. for (int ix = 0; ix < max_iterations; ++ix)
  227. {
  228. if (ix < x_range)
  229. {
  230. float x = f_beg.x + 1.0 + float(ix);
  231. avg_color += QuickSampleByPixel(float2(x, y) + offset, uvw.z, gamma);
  232. }
  233. }
  234. }
  235. }
  236. // Compute the area of the pixel box that was sampled.
  237. float area_corners = area_nw + area_ne + area_sw + area_se;
  238. float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e);
  239. float area_center = float(x_range) * float(y_range);
  240. // Return the normalized average color.
  241. return avg_color / (area_corners + area_edges + area_center);
  242. }
  243. /***** Main Functions *****/
  244. // Returns an accurate (gamma corrected) sample of a gamma space space texture.
  245. // Outputs in linear space for simplicity.
  246. float4 LinearGammaCorrectedSample(float gamma)
  247. {
  248. float3 uvw = v_tex0;
  249. float4 color = float4(0, 0, 0, 1);
  250. if (resampling_method <= 1) // Bilinear
  251. {
  252. color = BilinearSample(uvw, gamma);
  253. }
  254. else if (resampling_method == 2) // Bicubic: B-Spline
  255. {
  256. color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0, 0.0));
  257. }
  258. else if (resampling_method == 3) // Bicubic: Mitchell-Netravali
  259. {
  260. color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0 / 3.0, 1.0 / 3.0));
  261. }
  262. else if (resampling_method == 4) // Bicubic: Catmull-Rom
  263. {
  264. color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.5));
  265. }
  266. else if (resampling_method == 5) // Sharp Bilinear
  267. {
  268. color = SharpBilinearSample(uvw, gamma);
  269. }
  270. else if (resampling_method == 6) // Area Sampling
  271. {
  272. color = AreaSampling(uvw, gamma);
  273. }
  274. else if (resampling_method == 7) // Nearest Neighbor
  275. {
  276. color = QuickSample(uvw, gamma);
  277. }
  278. else if (resampling_method == 8) // Bicubic: Hermite
  279. {
  280. color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.0));
  281. }
  282. return color;
  283. }
  284. void main()
  285. {
  286. // This tries to fall back on GPU HW sampling if it can (it won't be gamma corrected).
  287. bool raw_resampling = resampling_method <= 0;
  288. bool needs_rescaling = GetResolution() != GetWindowResolution();
  289. bool needs_resampling = needs_rescaling && (OptionEnabled(hdr_output) || OptionEnabled(correct_gamma) || !raw_resampling);
  290. float4 color;
  291. if (needs_resampling)
  292. {
  293. // Doing linear sampling in "gamma space" on linear texture formats isn't correct.
  294. // If the source and target resolutions don't match, the GPU will return a color
  295. // that is the average of 4 gamma space colors, but gamma space colors can't be blended together,
  296. // gamma neeeds to be de-applied first. This makes a big difference if colors change
  297. // drastically between two pixels.
  298. color = LinearGammaCorrectedSample(game_gamma);
  299. }
  300. else
  301. {
  302. // Default GPU HW sampling. Bilinear is identical to Nearest Neighbor if the input and output resolutions match.
  303. if (needs_rescaling)
  304. color = texture(samp0, v_tex0);
  305. else
  306. color = texture(samp1, v_tex0);
  307. // Convert to linear before doing any other of follow up operations.
  308. color.rgb = pow(color.rgb, float3(game_gamma));
  309. }
  310. if (OptionEnabled(correct_color_space))
  311. {
  312. if (game_color_space == 0)
  313. color.rgb = color.rgb * from_NTSCM;
  314. else if (game_color_space == 1)
  315. color.rgb = color.rgb * from_NTSCJ;
  316. else if (game_color_space == 2)
  317. color.rgb = color.rgb * from_PAL;
  318. }
  319. if (OptionEnabled(hdr_output))
  320. {
  321. float hdr_paper_white = hdr_paper_white_nits / hdr_sdr_white_nits;
  322. color.rgb *= hdr_paper_white;
  323. }
  324. if (OptionEnabled(linear_space_output))
  325. {
  326. // Nothing to do here
  327. }
  328. // Correct the SDR gamma for sRGB (PC/Monitor) or ~2.2 (Common TV gamma)
  329. else if (OptionEnabled(correct_gamma))
  330. {
  331. if (OptionEnabled(sdr_display_gamma_sRGB))
  332. color.rgb = LinearTosRGBGamma(color.rgb);
  333. else
  334. color.rgb = pow(color.rgb, float3(1.0 / sdr_display_custom_gamma));
  335. }
  336. // Restore the original gamma without changes
  337. else
  338. {
  339. color.rgb = pow(color.rgb, float3(1.0 / game_gamma));
  340. }
  341. SetOutput(color);
  342. }