ssao.glsl 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. /* clang-format off */
  2. [vertex]
  3. layout(location = 0) in highp vec4 vertex_attrib;
  4. /* clang-format on */
  5. void main() {
  6. gl_Position = vertex_attrib;
  7. gl_Position.z = 1.0;
  8. }
  9. /* clang-format off */
  10. [fragment]
  11. #define TWO_PI 6.283185307179586476925286766559
  12. #ifdef SSAO_QUALITY_HIGH
  13. #define NUM_SAMPLES (16)
  14. #endif
  15. #ifdef SSAO_QUALITY_LOW
  16. #define NUM_SAMPLES (8)
  17. #endif
  18. #if !defined(SSAO_QUALITY_LOW) && !defined(SSAO_QUALITY_HIGH)
  19. #define NUM_SAMPLES (12)
  20. #endif
  21. // If using depth mip levels, the log of the maximum pixel offset before we need to switch to a lower
  22. // miplevel to maintain reasonable spatial locality in the cache
  23. // If this number is too small (< 3), too many taps will land in the same pixel, and we'll get bad variance that manifests as flashing.
  24. // If it is too high (> 5), we'll get bad performance because we're not using the MIP levels effectively
  25. #define LOG_MAX_OFFSET (3)
  26. // This must be less than or equal to the MAX_MIP_LEVEL defined in SSAO.cpp
  27. #define MAX_MIP_LEVEL (4)
  28. // This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent
  29. // taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9
  30. const int ROTATIONS[] = int[](
  31. 1, 1, 2, 3, 2, 5, 2, 3, 2,
  32. 3, 3, 5, 5, 3, 4, 7, 5, 5, 7,
  33. 9, 8, 5, 5, 7, 7, 7, 8, 5, 8,
  34. 11, 12, 7, 10, 13, 8, 11, 8, 7, 14,
  35. 11, 11, 13, 12, 13, 19, 17, 13, 11, 18,
  36. 19, 11, 11, 14, 17, 21, 15, 16, 17, 18,
  37. 13, 17, 11, 17, 19, 18, 25, 18, 19, 19,
  38. 29, 21, 19, 27, 31, 29, 21, 18, 17, 29,
  39. 31, 31, 23, 18, 25, 26, 25, 23, 19, 34,
  40. 19, 27, 21, 25, 39, 29, 17, 21, 27);
  41. /* clang-format on */
  42. //#define NUM_SPIRAL_TURNS (7)
  43. const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES - 1];
  44. uniform sampler2D source_depth; //texunit:0
  45. uniform highp usampler2D source_depth_mipmaps; //texunit:1
  46. uniform sampler2D source_normal; //texunit:2
  47. uniform ivec2 screen_size;
  48. uniform float camera_z_far;
  49. uniform float camera_z_near;
  50. uniform float intensity_div_r6;
  51. uniform float radius;
  52. #ifdef ENABLE_RADIUS2
  53. uniform float intensity_div_r62;
  54. uniform float radius2;
  55. #endif
  56. uniform float bias;
  57. uniform float proj_scale;
  58. layout(location = 0) out float visibility;
  59. uniform vec4 proj_info;
  60. vec3 reconstructCSPosition(vec2 S, float z) {
  61. #ifdef USE_ORTHOGONAL_PROJECTION
  62. return vec3((S.xy * proj_info.xy + proj_info.zw), z);
  63. #else
  64. return vec3((S.xy * proj_info.xy + proj_info.zw) * z, z);
  65. #endif
  66. }
  67. vec3 getPosition(ivec2 ssP) {
  68. vec3 P;
  69. P.z = texelFetch(source_depth, ssP, 0).r;
  70. P.z = P.z * 2.0 - 1.0;
  71. #ifdef USE_ORTHOGONAL_PROJECTION
  72. P.z = ((P.z + (camera_z_far + camera_z_near) / (camera_z_far - camera_z_near)) * (camera_z_far - camera_z_near)) / 2.0;
  73. #else
  74. P.z = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - P.z * (camera_z_far - camera_z_near));
  75. #endif
  76. P.z = -P.z;
  77. // Offset to pixel center
  78. P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z);
  79. return P;
  80. }
  81. /** Reconstructs screen-space unit normal from screen-space position */
  82. vec3 reconstructCSFaceNormal(vec3 C) {
  83. return normalize(cross(dFdy(C), dFdx(C)));
  84. }
  85. /** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */
  86. vec2 tapLocation(int sampleNumber, float spinAngle, out float ssR) {
  87. // Radius relative to ssR
  88. float alpha = (float(sampleNumber) + 0.5) * (1.0 / float(NUM_SAMPLES));
  89. float angle = alpha * (float(NUM_SPIRAL_TURNS) * 6.28) + spinAngle;
  90. ssR = alpha;
  91. return vec2(cos(angle), sin(angle));
  92. }
  93. /** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */
  94. vec3 getOffsetPosition(ivec2 ssC, vec2 unitOffset, float ssR) {
  95. // Derivation:
  96. // mipLevel = floor(log(ssR / MAX_OFFSET));
  97. int mipLevel = clamp(int(floor(log2(ssR))) - LOG_MAX_OFFSET, 0, MAX_MIP_LEVEL);
  98. ivec2 ssP = ivec2(ssR * unitOffset) + ssC;
  99. vec3 P;
  100. // We need to divide by 2^mipLevel to read the appropriately scaled coordinate from a MIP-map.
  101. // Manually clamp to the texture size because texelFetch bypasses the texture unit
  102. ivec2 mipP = clamp(ssP >> mipLevel, ivec2(0), (screen_size >> mipLevel) - ivec2(1));
  103. if (mipLevel < 1) {
  104. //read from depth buffer
  105. P.z = texelFetch(source_depth, mipP, 0).r;
  106. P.z = P.z * 2.0 - 1.0;
  107. #ifdef USE_ORTHOGONAL_PROJECTION
  108. P.z = ((P.z + (camera_z_far + camera_z_near) / (camera_z_far - camera_z_near)) * (camera_z_far - camera_z_near)) / 2.0;
  109. #else
  110. P.z = 2.0 * camera_z_near * camera_z_far / (camera_z_far + camera_z_near - P.z * (camera_z_far - camera_z_near));
  111. #endif
  112. P.z = -P.z;
  113. } else {
  114. //read from mipmaps
  115. uint d = texelFetch(source_depth_mipmaps, mipP, mipLevel - 1).r;
  116. P.z = -(float(d) / 65535.0) * camera_z_far;
  117. }
  118. // Offset to pixel center
  119. P = reconstructCSPosition(vec2(ssP) + vec2(0.5), P.z);
  120. return P;
  121. }
  122. /** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds
  123. to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius
  124. Note that units of H() in the HPG12 paper are meters, not
  125. unitless. The whole falloff/sampling function is therefore
  126. unitless. In this implementation, we factor out (9 / radius).
  127. Four versions of the falloff function are implemented below
  128. */
  129. float sampleAO(in ivec2 ssC, in vec3 C, in vec3 n_C, in float ssDiskRadius, in float p_radius, in int tapIndex, in float randomPatternRotationAngle) {
  130. // Offset on the unit disk, spun for this pixel
  131. float ssR;
  132. vec2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR);
  133. ssR *= ssDiskRadius;
  134. // The occluding point in camera space
  135. vec3 Q = getOffsetPosition(ssC, unitOffset, ssR);
  136. vec3 v = Q - C;
  137. float vv = dot(v, v);
  138. float vn = dot(v, n_C);
  139. const float epsilon = 0.01;
  140. float radius2 = p_radius * p_radius;
  141. // A: From the HPG12 paper
  142. // Note large epsilon to avoid overdarkening within cracks
  143. //return float(vv < radius2) * max((vn - bias) / (epsilon + vv), 0.0) * radius2 * 0.6;
  144. // B: Smoother transition to zero (lowers contrast, smoothing out corners). [Recommended]
  145. float f = max(radius2 - vv, 0.0);
  146. return f * f * f * max((vn - bias) / (epsilon + vv), 0.0);
  147. // C: Medium contrast (which looks better at high radii), no division. Note that the
  148. // contribution still falls off with radius^2, but we've adjusted the rate in a way that is
  149. // more computationally efficient and happens to be aesthetically pleasing.
  150. // return 4.0 * max(1.0 - vv * invRadius2, 0.0) * max(vn - bias, 0.0);
  151. // D: Low contrast, no division operation
  152. // return 2.0 * float(vv < radius * radius) * max(vn - bias, 0.0);
  153. }
  154. void main() {
  155. // Pixel being shaded
  156. ivec2 ssC = ivec2(gl_FragCoord.xy);
  157. // World space point being shaded
  158. vec3 C = getPosition(ssC);
  159. /*
  160. if (C.z <= -camera_z_far * 0.999) {
  161. // We're on the skybox
  162. visibility=1.0;
  163. return;
  164. }
  165. */
  166. //visibility = -C.z / camera_z_far;
  167. //return;
  168. #if 0
  169. vec3 n_C = texelFetch(source_normal, ssC, 0).rgb * 2.0 - 1.0;
  170. #else
  171. vec3 n_C = reconstructCSFaceNormal(C);
  172. n_C = -n_C;
  173. #endif
  174. // Hash function used in the HPG12 AlchemyAO paper
  175. float randomPatternRotationAngle = mod(float((3 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 10), TWO_PI);
  176. // Reconstruct normals from positions. These will lead to 1-pixel black lines
  177. // at depth discontinuities, however the blur will wipe those out so they are not visible
  178. // in the final image.
  179. // Choose the screen-space sample radius
  180. // proportional to the projected area of the sphere
  181. #ifdef USE_ORTHOGONAL_PROJECTION
  182. float ssDiskRadius = -proj_scale * radius;
  183. #else
  184. float ssDiskRadius = -proj_scale * radius / C.z;
  185. #endif
  186. float sum = 0.0;
  187. for (int i = 0; i < NUM_SAMPLES; ++i) {
  188. sum += sampleAO(ssC, C, n_C, ssDiskRadius, radius, i, randomPatternRotationAngle);
  189. }
  190. float A = max(0.0, 1.0 - sum * intensity_div_r6 * (5.0 / float(NUM_SAMPLES)));
  191. #ifdef ENABLE_RADIUS2
  192. //go again for radius2
  193. randomPatternRotationAngle = mod(float((5 * ssC.x ^ ssC.y + ssC.x * ssC.y) * 11), TWO_PI);
  194. // Reconstruct normals from positions. These will lead to 1-pixel black lines
  195. // at depth discontinuities, however the blur will wipe those out so they are not visible
  196. // in the final image.
  197. // Choose the screen-space sample radius
  198. // proportional to the projected area of the sphere
  199. ssDiskRadius = -proj_scale * radius2 / C.z;
  200. sum = 0.0;
  201. for (int i = 0; i < NUM_SAMPLES; ++i) {
  202. sum += sampleAO(ssC, C, n_C, ssDiskRadius, radius2, i, randomPatternRotationAngle);
  203. }
  204. A = min(A, max(0.0, 1.0 - sum * intensity_div_r62 * (5.0 / float(NUM_SAMPLES))));
  205. #endif
  206. // Bilateral box-filter over a quad for free, respecting depth edges
  207. // (the difference that this makes is subtle)
  208. if (abs(dFdx(C.z)) < 0.02) {
  209. A -= dFdx(A) * (float(ssC.x & 1) - 0.5);
  210. }
  211. if (abs(dFdy(C.z)) < 0.02) {
  212. A -= dFdy(A) * (float(ssC.y & 1) - 0.5);
  213. }
  214. visibility = A;
  215. }