lm_compute.glsl 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066
  1. #[versions]
  2. primary = "#define MODE_DIRECT_LIGHT";
  3. secondary = "#define MODE_BOUNCE_LIGHT";
  4. dilate = "#define MODE_DILATE";
  5. unocclude = "#define MODE_UNOCCLUDE";
  6. light_probes = "#define MODE_LIGHT_PROBES";
  7. denoise = "#define MODE_DENOISE";
  8. pack_coeffs = "#define MODE_PACK_L1_COEFFS";
  9. #[compute]
  10. #version 450
  11. #VERSION_DEFINES
  12. #extension GL_EXT_samplerless_texture_functions : enable
  13. // One 2D local group focusing in one layer at a time, though all
  14. // in parallel (no barriers) makes more sense than a 3D local group
  15. // as this can take more advantage of the cache for each group.
  16. #ifdef MODE_LIGHT_PROBES
  17. layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  18. #else
  19. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  20. #endif
  21. #include "lm_common_inc.glsl"
  22. #ifdef MODE_LIGHT_PROBES
  23. layout(set = 1, binding = 0, std430) restrict buffer LightProbeData {
  24. vec4 data[];
  25. }
  26. light_probes;
  27. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  28. layout(set = 1, binding = 2) uniform texture2D environment;
  29. #endif
  30. #ifdef MODE_UNOCCLUDE
  31. layout(rgba32f, set = 1, binding = 0) uniform restrict image2DArray position;
  32. layout(rgba32f, set = 1, binding = 1) uniform restrict readonly image2DArray unocclude;
  33. #endif
  34. #if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT)
  35. layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
  36. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  37. layout(set = 1, binding = 2) uniform texture2DArray source_position;
  38. layout(set = 1, binding = 3) uniform texture2DArray source_normal;
  39. layout(rgba16f, set = 1, binding = 4) uniform restrict image2DArray accum_light;
  40. #endif
  41. #ifdef MODE_BOUNCE_LIGHT
  42. layout(set = 1, binding = 5) uniform texture2D environment;
  43. #endif
  44. #if defined(MODE_DILATE) || defined(MODE_DENOISE) || defined(MODE_PACK_L1_COEFFS)
  45. layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
  46. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  47. #endif
  48. #ifdef MODE_DENOISE
  49. layout(set = 1, binding = 2) uniform texture2DArray source_normal;
  50. layout(set = 1, binding = 3) uniform DenoiseParams {
  51. float spatial_bandwidth;
  52. float light_bandwidth;
  53. float albedo_bandwidth;
  54. float normal_bandwidth;
  55. int half_search_window;
  56. float filter_strength;
  57. }
  58. denoise_params;
  59. #endif
  60. layout(push_constant, std430) uniform Params {
  61. uint atlas_slice;
  62. uint ray_count;
  63. uint ray_from;
  64. uint ray_to;
  65. ivec2 region_ofs;
  66. uint probe_count;
  67. }
  68. params;
  69. //check it, but also return distance and barycentric coords (for uv lookup)
  70. bool ray_hits_triangle(vec3 from, vec3 dir, float max_dist, vec3 p0, vec3 p1, vec3 p2, out float r_distance, out vec3 r_barycentric) {
  71. const float EPSILON = 0.00001;
  72. const vec3 e0 = p1 - p0;
  73. const vec3 e1 = p0 - p2;
  74. vec3 triangle_normal = cross(e1, e0);
  75. float n_dot_dir = dot(triangle_normal, dir);
  76. if (abs(n_dot_dir) < EPSILON) {
  77. return false;
  78. }
  79. const vec3 e2 = (p0 - from) / n_dot_dir;
  80. const vec3 i = cross(dir, e2);
  81. r_barycentric.y = dot(i, e1);
  82. r_barycentric.z = dot(i, e0);
  83. r_barycentric.x = 1.0 - (r_barycentric.z + r_barycentric.y);
  84. r_distance = dot(triangle_normal, e2);
  85. return (r_distance > bake_params.bias) && (r_distance < max_dist) && all(greaterThanEqual(r_barycentric, vec3(0.0)));
  86. }
  87. const uint RAY_MISS = 0;
  88. const uint RAY_FRONT = 1;
  89. const uint RAY_BACK = 2;
  90. const uint RAY_ANY = 3;
  91. bool ray_box_test(vec3 p_from, vec3 p_inv_dir, vec3 p_box_min, vec3 p_box_max) {
  92. vec3 t0 = (p_box_min - p_from) * p_inv_dir;
  93. vec3 t1 = (p_box_max - p_from) * p_inv_dir;
  94. vec3 tmin = min(t0, t1), tmax = max(t0, t1);
  95. return max(tmin.x, max(tmin.y, tmin.z)) <= min(tmax.x, min(tmax.y, tmax.z));
  96. }
  97. #if CLUSTER_SIZE > 32
  98. #define CLUSTER_TRIANGLE_ITERATION
  99. #endif
  100. uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) {
  101. // World coordinates.
  102. vec3 rel = p_to - p_from;
  103. float rel_len = length(rel);
  104. vec3 dir = normalize(rel);
  105. vec3 inv_dir = 1.0 / dir;
  106. // Cell coordinates.
  107. vec3 from_cell = (p_from - bake_params.to_cell_offset) * bake_params.to_cell_size;
  108. vec3 to_cell = (p_to - bake_params.to_cell_offset) * bake_params.to_cell_size;
  109. // Prepare DDA.
  110. vec3 rel_cell = to_cell - from_cell;
  111. ivec3 icell = ivec3(from_cell);
  112. ivec3 iendcell = ivec3(to_cell);
  113. vec3 dir_cell = normalize(rel_cell);
  114. vec3 delta = min(abs(1.0 / dir_cell), bake_params.grid_size); // Use bake_params.grid_size as max to prevent infinity values.
  115. ivec3 step = ivec3(sign(rel_cell));
  116. vec3 side = (sign(rel_cell) * (vec3(icell) - from_cell) + (sign(rel_cell) * 0.5) + 0.5) * delta;
  117. uint iters = 0;
  118. while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) {
  119. uvec2 cell_data = texelFetch(grid, icell, 0).xy;
  120. uint triangle_count = cell_data.x;
  121. if (triangle_count > 0) {
  122. uint hit = RAY_MISS;
  123. float best_distance = 1e20;
  124. uint cluster_start = cluster_indices.data[cell_data.y * 2];
  125. uint cell_triangle_start = cluster_indices.data[cell_data.y * 2 + 1];
  126. uint cluster_count = (triangle_count + CLUSTER_SIZE - 1) / CLUSTER_SIZE;
  127. uint cluster_base_index = 0;
  128. while (cluster_base_index < cluster_count) {
  129. // To minimize divergence, all Ray-AABB tests on the clusters contained in the cell are performed
  130. // before checking against the triangles. We do this 32 clusters at a time and store the intersected
  131. // clusters on each bit of the 32-bit integer.
  132. uint cluster_test_count = min(32, cluster_count - cluster_base_index);
  133. uint cluster_hits = 0;
  134. for (uint i = 0; i < cluster_test_count; i++) {
  135. uint cluster_index = cluster_start + cluster_base_index + i;
  136. ClusterAABB cluster_aabb = cluster_aabbs.data[cluster_index];
  137. if (ray_box_test(p_from, inv_dir, cluster_aabb.min_bounds, cluster_aabb.max_bounds)) {
  138. cluster_hits |= (1 << i);
  139. }
  140. }
  141. // Check the triangles in any of the clusters that were intersected by toggling off the bits in the
  142. // 32-bit integer counter until no bits are left.
  143. while (cluster_hits > 0) {
  144. uint cluster_index = findLSB(cluster_hits);
  145. cluster_hits &= ~(1 << cluster_index);
  146. cluster_index += cluster_base_index;
  147. // Do the same divergence execution trick with triangles as well.
  148. uint triangle_base_index = 0;
  149. #ifdef CLUSTER_TRIANGLE_ITERATION
  150. while (triangle_base_index < triangle_count)
  151. #endif
  152. {
  153. uint triangle_start_index = cell_triangle_start + cluster_index * CLUSTER_SIZE + triangle_base_index;
  154. uint triangle_test_count = min(CLUSTER_SIZE, triangle_count - triangle_base_index);
  155. uint triangle_hits = 0;
  156. for (uint i = 0; i < triangle_test_count; i++) {
  157. uint triangle_index = triangle_indices.data[triangle_start_index + i];
  158. if (ray_box_test(p_from, inv_dir, triangles.data[triangle_index].min_bounds, triangles.data[triangle_index].max_bounds)) {
  159. triangle_hits |= (1 << i);
  160. }
  161. }
  162. while (triangle_hits > 0) {
  163. uint cluster_triangle_index = findLSB(triangle_hits);
  164. triangle_hits &= ~(1 << cluster_triangle_index);
  165. cluster_triangle_index += triangle_start_index;
  166. uint triangle_index = triangle_indices.data[cluster_triangle_index];
  167. Triangle triangle = triangles.data[triangle_index];
  168. // Gather the triangle vertex positions.
  169. vec3 vtx0 = vertices.data[triangle.indices.x].position;
  170. vec3 vtx1 = vertices.data[triangle.indices.y].position;
  171. vec3 vtx2 = vertices.data[triangle.indices.z].position;
  172. vec3 normal = -normalize(cross((vtx0 - vtx1), (vtx0 - vtx2)));
  173. bool backface = dot(normal, dir) >= 0.0;
  174. float distance;
  175. vec3 barycentric;
  176. if (ray_hits_triangle(p_from, dir, rel_len, vtx0, vtx1, vtx2, distance, barycentric)) {
  177. if (p_any_hit) {
  178. // Return early if any hit was requested.
  179. return RAY_ANY;
  180. }
  181. vec3 position = p_from + dir * distance;
  182. vec3 hit_cell = (position - bake_params.to_cell_offset) * bake_params.to_cell_size;
  183. if (icell != ivec3(hit_cell)) {
  184. // It's possible for the ray to hit a triangle in a position outside the bounds of the cell
  185. // if it's large enough to cover multiple ones. The hit must be ignored if this is the case.
  186. continue;
  187. }
  188. if (!backface) {
  189. // The case of meshes having both a front and back face in the same plane is more common than
  190. // expected, so if this is a front-face, bias it closer to the ray origin, so it always wins
  191. // over the back-face.
  192. distance = max(bake_params.bias, distance - bake_params.bias);
  193. }
  194. if (distance < best_distance) {
  195. hit = backface ? RAY_BACK : RAY_FRONT;
  196. best_distance = distance;
  197. r_distance = distance;
  198. r_normal = normal;
  199. r_triangle = triangle_index;
  200. r_barycentric = barycentric;
  201. }
  202. }
  203. }
  204. #ifdef CLUSTER_TRIANGLE_ITERATION
  205. triangle_base_index += CLUSTER_SIZE;
  206. #endif
  207. }
  208. }
  209. cluster_base_index += 32;
  210. }
  211. if (hit != RAY_MISS) {
  212. return hit;
  213. }
  214. }
  215. if (icell == iendcell) {
  216. break;
  217. }
  218. // There should be only one axis updated at a time for DDA to work properly.
  219. bvec3 mask = bvec3(true, false, false);
  220. float m = side.x;
  221. if (side.y < m) {
  222. m = side.y;
  223. mask = bvec3(false, true, false);
  224. }
  225. if (side.z < m) {
  226. mask = bvec3(false, false, true);
  227. }
  228. side += vec3(mask) * delta;
  229. icell += ivec3(vec3(mask)) * step;
  230. iters++;
  231. }
  232. return RAY_MISS;
  233. }
  234. uint trace_ray_closest_hit_triangle(vec3 p_from, vec3 p_to, out uint r_triangle, out vec3 r_barycentric) {
  235. float distance;
  236. vec3 normal;
  237. return trace_ray(p_from, p_to, false, distance, normal, r_triangle, r_barycentric);
  238. }
  239. uint trace_ray_closest_hit_distance(vec3 p_from, vec3 p_to, out float r_distance, out vec3 r_normal) {
  240. uint triangle;
  241. vec3 barycentric;
  242. return trace_ray(p_from, p_to, false, r_distance, r_normal, triangle, barycentric);
  243. }
  244. uint trace_ray_any_hit(vec3 p_from, vec3 p_to) {
  245. float distance;
  246. vec3 normal;
  247. uint triangle;
  248. vec3 barycentric;
  249. return trace_ray(p_from, p_to, true, distance, normal, triangle, barycentric);
  250. }
  251. // https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
  252. uint hash(uint value) {
  253. uint state = value * 747796405u + 2891336453u;
  254. uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
  255. return (word >> 22u) ^ word;
  256. }
  257. uint random_seed(ivec3 seed) {
  258. return hash(seed.x ^ hash(seed.y ^ hash(seed.z)));
  259. }
  260. // generates a random value in range [0.0, 1.0)
  261. float randomize(inout uint value) {
  262. value = hash(value);
  263. return float(value / 4294967296.0);
  264. }
  265. const float PI = 3.14159265f;
  266. // http://www.realtimerendering.com/raytracinggems/unofficial_RayTracingGems_v1.4.pdf (chapter 15)
  267. vec3 generate_hemisphere_cosine_weighted_direction(inout uint noise) {
  268. float noise1 = randomize(noise);
  269. float noise2 = randomize(noise) * 2.0 * PI;
  270. return vec3(sqrt(noise1) * cos(noise2), sqrt(noise1) * sin(noise2), sqrt(1.0 - noise1));
  271. }
  272. // Distribution generation adapted from "Generating uniformly distributed numbers on a sphere"
  273. // <http://corysimon.github.io/articles/uniformdistn-on-sphere/>
  274. vec3 generate_sphere_uniform_direction(inout uint noise) {
  275. float theta = 2.0 * PI * randomize(noise);
  276. float phi = acos(1.0 - 2.0 * randomize(noise));
  277. return vec3(sin(phi) * cos(theta), sin(phi) * sin(theta), cos(phi));
  278. }
  279. vec3 generate_ray_dir_from_normal(vec3 normal, inout uint noise) {
  280. vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
  281. vec3 tangent = normalize(cross(v0, normal));
  282. vec3 bitangent = normalize(cross(tangent, normal));
  283. mat3 normal_mat = mat3(tangent, bitangent, normal);
  284. return normal_mat * generate_hemisphere_cosine_weighted_direction(noise);
  285. }
  286. #if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
  287. float get_omni_attenuation(float distance, float inv_range, float decay) {
  288. float nd = distance * inv_range;
  289. nd *= nd;
  290. nd *= nd; // nd^4
  291. nd = max(1.0 - nd, 0.0);
  292. nd *= nd; // nd^2
  293. return nd * pow(max(distance, 0.0001), -decay);
  294. }
  295. const int AA_SAMPLES = 16;
  296. const vec2 halton_map[AA_SAMPLES] = vec2[](
  297. vec2(0.5, 0.33333333),
  298. vec2(0.25, 0.66666667),
  299. vec2(0.75, 0.11111111),
  300. vec2(0.125, 0.44444444),
  301. vec2(0.625, 0.77777778),
  302. vec2(0.375, 0.22222222),
  303. vec2(0.875, 0.55555556),
  304. vec2(0.0625, 0.88888889),
  305. vec2(0.5625, 0.03703704),
  306. vec2(0.3125, 0.37037037),
  307. vec2(0.8125, 0.7037037),
  308. vec2(0.1875, 0.14814815),
  309. vec2(0.6875, 0.48148148),
  310. vec2(0.4375, 0.81481481),
  311. vec2(0.9375, 0.25925926),
  312. vec2(0.03125, 0.59259259));
  313. vec2 get_vogel_disk(float p_i, float p_rotation, float p_sample_count_sqrt) {
  314. const float golden_angle = 2.4;
  315. float r = sqrt(p_i + 0.5) / p_sample_count_sqrt;
  316. float theta = p_i * golden_angle + p_rotation;
  317. return vec2(cos(theta), sin(theta)) * r;
  318. }
  319. void trace_direct_light(vec3 p_position, vec3 p_normal, uint p_light_index, bool p_soft_shadowing, out vec3 r_light, out vec3 r_light_dir, inout uint r_noise, float p_texel_size) {
  320. r_light = vec3(0.0f);
  321. vec3 light_pos;
  322. float dist;
  323. float attenuation;
  324. float soft_shadowing_disk_size;
  325. Light light_data = lights.data[p_light_index];
  326. if (light_data.type == LIGHT_TYPE_DIRECTIONAL) {
  327. vec3 light_vec = light_data.direction;
  328. light_pos = p_position - light_vec * length(bake_params.world_size);
  329. r_light_dir = normalize(light_pos - p_position);
  330. dist = length(bake_params.world_size);
  331. attenuation = 1.0;
  332. soft_shadowing_disk_size = light_data.size;
  333. } else {
  334. light_pos = light_data.position;
  335. r_light_dir = normalize(light_pos - p_position);
  336. dist = distance(p_position, light_pos);
  337. if (dist > light_data.range) {
  338. return;
  339. }
  340. soft_shadowing_disk_size = light_data.size / dist;
  341. attenuation = get_omni_attenuation(dist, 1.0 / light_data.range, light_data.attenuation);
  342. if (light_data.type == LIGHT_TYPE_SPOT) {
  343. vec3 rel = normalize(p_position - light_pos);
  344. float cos_spot_angle = light_data.cos_spot_angle;
  345. float cos_angle = dot(rel, light_data.direction);
  346. if (cos_angle < cos_spot_angle) {
  347. return;
  348. }
  349. float scos = max(cos_angle, cos_spot_angle);
  350. float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle));
  351. attenuation *= 1.0 - pow(spot_rim, light_data.inv_spot_attenuation);
  352. }
  353. }
  354. attenuation *= max(0.0, dot(p_normal, r_light_dir));
  355. if (attenuation <= 0.0001) {
  356. return;
  357. }
  358. float penumbra = 0.0;
  359. if (p_soft_shadowing) {
  360. const bool use_soft_shadows = (light_data.size > 0.0);
  361. const uint ray_count = AA_SAMPLES;
  362. const uint total_ray_count = use_soft_shadows ? params.ray_count : ray_count;
  363. const uint shadowing_rays_check_penumbra_denom = 2;
  364. const uint shadowing_ray_count = max(1, params.ray_count / ray_count);
  365. const float shadowing_ray_count_sqrt = sqrt(float(total_ray_count));
  366. // Setup tangent pass to calculate AA samples over the current texel.
  367. vec3 aux = p_normal.y < 0.777 ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0);
  368. vec3 tangent = normalize(cross(p_normal, aux));
  369. vec3 bitan = normalize(cross(p_normal, tangent));
  370. // Setup light tangent pass to calculate samples over disk aligned towards the light
  371. vec3 light_to_point = -r_light_dir;
  372. vec3 light_aux = light_to_point.y < 0.777 ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0);
  373. vec3 light_to_point_tan = normalize(cross(light_to_point, light_aux));
  374. vec3 light_to_point_bitan = normalize(cross(light_to_point, light_to_point_tan));
  375. uint hits = 0;
  376. for (uint i = 0; i < ray_count; i++) {
  377. // Create a random sample within the texel.
  378. vec2 disk_sample = (halton_map[i] - vec2(0.5)) * p_texel_size * light_data.shadow_blur;
  379. // Align the sample to world space.
  380. vec3 disk_aligned = (disk_sample.x * tangent + disk_sample.y * bitan);
  381. vec3 origin = p_position - disk_aligned;
  382. vec3 light_dir = normalize(light_pos - origin);
  383. if (use_soft_shadows) {
  384. uint soft_shadow_hits = 0;
  385. for (uint j = 0; j < shadowing_ray_count; j++) {
  386. // Optimization:
  387. // Once already traced an important proportion of rays, if all are hits or misses,
  388. // assume we're not in the penumbra so we can infer the rest would have the same result.
  389. if (j == shadowing_ray_count / shadowing_rays_check_penumbra_denom) {
  390. if (soft_shadow_hits == j) {
  391. // Assume totally lit
  392. soft_shadow_hits = shadowing_ray_count;
  393. break;
  394. } else if (soft_shadow_hits == 0) {
  395. // Assume totally dark
  396. soft_shadow_hits = 0;
  397. break;
  398. }
  399. }
  400. float a = randomize(r_noise) * 2.0 * PI;
  401. float vogel_index = float(total_ray_count - 1 - (i * shadowing_ray_count + j)); // Start from (total_ray_count - 1) so we check the outer points first.
  402. vec2 light_disk_sample = get_vogel_disk(vogel_index, a, shadowing_ray_count_sqrt) * soft_shadowing_disk_size * light_data.shadow_blur;
  403. vec3 light_disk_to_point = normalize(light_to_point + light_disk_sample.x * light_to_point_tan + light_disk_sample.y * light_to_point_bitan);
  404. // Offset the ray origin for AA, offset the light position for soft shadows.
  405. if (trace_ray_any_hit(origin - light_disk_to_point * (bake_params.bias + length(disk_sample)), p_position - light_disk_to_point * dist) == RAY_MISS) {
  406. soft_shadow_hits++;
  407. }
  408. }
  409. hits += soft_shadow_hits;
  410. } else {
  411. // Offset the ray origin based on the disk. Also increase the bias for further samples to avoid bleeding.
  412. if (trace_ray_any_hit(origin + light_dir * (bake_params.bias + length(disk_sample)), light_pos) == RAY_MISS) {
  413. hits++;
  414. }
  415. }
  416. }
  417. penumbra = float(hits) / float(total_ray_count);
  418. } else {
  419. if (trace_ray_any_hit(p_position + r_light_dir * bake_params.bias, light_pos) == RAY_MISS) {
  420. penumbra = 1.0;
  421. }
  422. }
  423. r_light = light_data.color * light_data.energy * attenuation * penumbra;
  424. }
  425. #endif
  426. #if defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
  427. vec3 trace_environment_color(vec3 ray_dir) {
  428. vec3 sky_dir = normalize(mat3(bake_params.env_transform) * ray_dir);
  429. vec2 st = vec2(atan(sky_dir.x, sky_dir.z), acos(sky_dir.y));
  430. if (st.x < 0.0) {
  431. st.x += PI * 2.0;
  432. }
  433. return textureLod(sampler2D(environment, linear_sampler), st / vec2(PI * 2.0, PI), 0.0).rgb;
  434. }
  435. vec3 trace_indirect_light(vec3 p_position, vec3 p_ray_dir, inout uint r_noise, float p_texel_size) {
  436. // The lower limit considers the case where the lightmapper might have bounces disabled but light probes are requested.
  437. vec3 position = p_position;
  438. vec3 ray_dir = p_ray_dir;
  439. uint max_depth = max(bake_params.bounces, 1);
  440. vec3 throughput = vec3(1.0);
  441. vec3 light = vec3(0.0);
  442. for (uint depth = 0; depth < max_depth; depth++) {
  443. uint tidx;
  444. vec3 barycentric;
  445. uint trace_result = trace_ray_closest_hit_triangle(position + ray_dir * bake_params.bias, position + ray_dir * length(bake_params.world_size), tidx, barycentric);
  446. if (trace_result == RAY_FRONT) {
  447. Vertex vert0 = vertices.data[triangles.data[tidx].indices.x];
  448. Vertex vert1 = vertices.data[triangles.data[tidx].indices.y];
  449. Vertex vert2 = vertices.data[triangles.data[tidx].indices.z];
  450. vec3 uvw = vec3(barycentric.x * vert0.uv + barycentric.y * vert1.uv + barycentric.z * vert2.uv, float(triangles.data[tidx].slice));
  451. position = barycentric.x * vert0.position + barycentric.y * vert1.position + barycentric.z * vert2.position;
  452. vec3 norm0 = vec3(vert0.normal_xy, vert0.normal_z);
  453. vec3 norm1 = vec3(vert1.normal_xy, vert1.normal_z);
  454. vec3 norm2 = vec3(vert2.normal_xy, vert2.normal_z);
  455. vec3 normal = barycentric.x * norm0 + barycentric.y * norm1 + barycentric.z * norm2;
  456. vec3 direct_light = vec3(0.0f);
  457. #ifdef USE_LIGHT_TEXTURE_FOR_BOUNCES
  458. direct_light += textureLod(sampler2DArray(source_light, linear_sampler), uvw, 0.0).rgb;
  459. #else
  460. // Trace the lights directly. Significantly more expensive but more accurate in scenarios
  461. // where the lightmap texture isn't reliable.
  462. for (uint i = 0; i < bake_params.light_count; i++) {
  463. vec3 light;
  464. vec3 light_dir;
  465. trace_direct_light(position, normal, i, false, light, light_dir, r_noise, p_texel_size);
  466. direct_light += light * lights.data[i].indirect_energy;
  467. }
  468. direct_light *= bake_params.exposure_normalization;
  469. #endif
  470. vec3 albedo = textureLod(sampler2DArray(albedo_tex, linear_sampler), uvw, 0).rgb;
  471. vec3 emissive = textureLod(sampler2DArray(emission_tex, linear_sampler), uvw, 0).rgb;
  472. emissive *= bake_params.exposure_normalization;
  473. light += throughput * emissive;
  474. throughput *= albedo;
  475. light += throughput * direct_light * bake_params.bounce_indirect_energy;
  476. // Use Russian Roulette to determine a probability to terminate the bounce earlier as an optimization.
  477. // <https://computergraphics.stackexchange.com/questions/2316/is-russian-roulette-really-the-answer>
  478. float p = max(max(throughput.x, throughput.y), throughput.z);
  479. if (randomize(r_noise) > p) {
  480. break;
  481. }
  482. // Boost the throughput from the probability of the ray being terminated early.
  483. throughput *= 1.0 / p;
  484. // Generate a new ray direction for the next bounce from this surface's normal.
  485. ray_dir = generate_ray_dir_from_normal(normal, r_noise);
  486. } else if (trace_result == RAY_MISS) {
  487. // Look for the environment color and stop bouncing.
  488. light += throughput * trace_environment_color(ray_dir);
  489. break;
  490. } else {
  491. // Ignore any other trace results.
  492. break;
  493. }
  494. }
  495. return light;
  496. }
  497. #endif
  498. void main() {
  499. // Check if invocation is out of bounds.
  500. #ifdef MODE_LIGHT_PROBES
  501. int probe_index = int(gl_GlobalInvocationID.x);
  502. if (probe_index >= params.probe_count) {
  503. return;
  504. }
  505. #else
  506. ivec2 atlas_pos = ivec2(gl_GlobalInvocationID.xy) + params.region_ofs;
  507. if (any(greaterThanEqual(atlas_pos, bake_params.atlas_size))) {
  508. return;
  509. }
  510. #endif
  511. #ifdef MODE_DIRECT_LIGHT
  512. vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  513. if (length(normal) < 0.5) {
  514. return; //empty texel, no process
  515. }
  516. vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  517. vec4 neighbor_position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos + ivec2(1, 0), params.atlas_slice), 0).xyzw;
  518. if (neighbor_position.w < 0.001) {
  519. // Empty texel, try again.
  520. neighbor_position.xyz = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos + ivec2(-1, 0), params.atlas_slice), 0).xyz;
  521. }
  522. float texel_size_world_space = distance(position, neighbor_position.xyz);
  523. vec3 light_for_texture = vec3(0.0);
  524. vec3 light_for_bounces = vec3(0.0);
  525. #ifdef USE_SH_LIGHTMAPS
  526. vec4 sh_accum[4] = vec4[](
  527. vec4(0.0, 0.0, 0.0, 1.0),
  528. vec4(0.0, 0.0, 0.0, 1.0),
  529. vec4(0.0, 0.0, 0.0, 1.0),
  530. vec4(0.0, 0.0, 0.0, 1.0));
  531. #endif
  532. // Use atlas position and a prime number as the seed.
  533. uint noise = random_seed(ivec3(atlas_pos, 43573547));
  534. for (uint i = 0; i < bake_params.light_count; i++) {
  535. vec3 light;
  536. vec3 light_dir;
  537. trace_direct_light(position, normal, i, true, light, light_dir, noise, texel_size_world_space);
  538. if (lights.data[i].static_bake) {
  539. light_for_texture += light;
  540. #ifdef USE_SH_LIGHTMAPS
  541. // These coefficients include the factored out SH evaluation, diffuse convolution, and final application, as well as the BRDF 1/PI and the spherical monte carlo factor.
  542. // LO: 1/(2*sqrtPI) * 1/(2*sqrtPI) * PI * PI * 1/PI = 0.25
  543. // L1: sqrt(3/(4*pi)) * sqrt(3/(4*pi)) * (PI*2/3) * (2 * PI) * 1/PI = 1.0
  544. // Note: This only works because we aren't scaling, rotating, or combing harmonics, we are just directing applying them in the shader.
  545. float c[4] = float[](
  546. 0.25, //l0
  547. light_dir.y, //l1n1
  548. light_dir.z, //l1n0
  549. light_dir.x //l1p1
  550. );
  551. for (uint j = 0; j < 4; j++) {
  552. sh_accum[j].rgb += light * c[j] * bake_params.exposure_normalization;
  553. }
  554. #endif
  555. }
  556. light_for_bounces += light * lights.data[i].indirect_energy;
  557. }
  558. light_for_bounces *= bake_params.exposure_normalization;
  559. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_bounces, 1.0));
  560. #ifdef USE_SH_LIGHTMAPS
  561. // Keep for adding at the end.
  562. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 0), sh_accum[0]);
  563. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 1), sh_accum[1]);
  564. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 2), sh_accum[2]);
  565. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 3), sh_accum[3]);
  566. #else
  567. light_for_texture *= bake_params.exposure_normalization;
  568. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_texture, 1.0));
  569. #endif
  570. #endif
  571. #ifdef MODE_BOUNCE_LIGHT
  572. #ifdef USE_SH_LIGHTMAPS
  573. vec4 sh_accum[4] = vec4[](
  574. vec4(0.0, 0.0, 0.0, 1.0),
  575. vec4(0.0, 0.0, 0.0, 1.0),
  576. vec4(0.0, 0.0, 0.0, 1.0),
  577. vec4(0.0, 0.0, 0.0, 1.0));
  578. #else
  579. vec3 light_accum = vec3(0.0);
  580. #endif
  581. // Retrieve starting normal and position.
  582. vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  583. if (length(normal) < 0.5) {
  584. // The pixel is empty, skip processing it.
  585. return;
  586. }
  587. vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  588. int neighbor_offset = atlas_pos.x < bake_params.atlas_size.x - 1 ? 1 : -1;
  589. vec3 neighbor_position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos + ivec2(neighbor_offset, 0), params.atlas_slice), 0).xyz;
  590. float texel_size_world_space = distance(position, neighbor_position);
  591. uint noise = random_seed(ivec3(params.ray_from, atlas_pos));
  592. for (uint i = params.ray_from; i < params.ray_to; i++) {
  593. vec3 ray_dir = generate_ray_dir_from_normal(normal, noise);
  594. vec3 light = trace_indirect_light(position, ray_dir, noise, texel_size_world_space);
  595. #ifdef USE_SH_LIGHTMAPS
  596. // These coefficients include the factored out SH evaluation, diffuse convolution, and final application, as well as the BRDF 1/PI and the spherical monte carlo factor.
  597. // LO: 1/(2*sqrtPI) * 1/(2*sqrtPI) * PI * PI * 1/PI = 0.25
  598. // L1: sqrt(3/(4*pi)) * sqrt(3/(4*pi)) * (PI*2/3) * (2 * PI) * 1/PI = 1.0
  599. // Note: This only works because we aren't scaling, rotating, or combing harmonics, we are just directing applying them in the shader.
  600. float c[4] = float[](
  601. 0.25, //l0
  602. ray_dir.y, //l1n1
  603. ray_dir.z, //l1n0
  604. ray_dir.x //l1p1
  605. );
  606. for (uint j = 0; j < 4; j++) {
  607. sh_accum[j].rgb += light * c[j];
  608. }
  609. #else
  610. light_accum += light;
  611. #endif
  612. }
  613. // Add the averaged result to the accumulated light texture.
  614. #ifdef USE_SH_LIGHTMAPS
  615. for (int i = 0; i < 4; i++) {
  616. vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i));
  617. accum.rgb += sh_accum[i].rgb / float(params.ray_count);
  618. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i), accum);
  619. }
  620. #else
  621. vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice));
  622. accum.rgb += light_accum / float(params.ray_count);
  623. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), accum);
  624. #endif
  625. #endif
  626. #ifdef MODE_UNOCCLUDE
  627. //texel_size = 0.5;
  628. //compute tangents
  629. vec4 position_alpha = imageLoad(position, ivec3(atlas_pos, params.atlas_slice));
  630. if (position_alpha.a < 0.5) {
  631. return;
  632. }
  633. vec3 vertex_pos = position_alpha.xyz;
  634. vec4 normal_tsize = imageLoad(unocclude, ivec3(atlas_pos, params.atlas_slice));
  635. vec3 face_normal = normal_tsize.xyz;
  636. float texel_size = normal_tsize.w;
  637. vec3 v0 = abs(face_normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
  638. vec3 tangent = normalize(cross(v0, face_normal));
  639. vec3 bitangent = normalize(cross(tangent, face_normal));
  640. vec3 base_pos = vertex_pos + face_normal * bake_params.bias; // Raise a bit.
  641. vec3 rays[4] = vec3[](tangent, bitangent, -tangent, -bitangent);
  642. float min_d = 1e20;
  643. for (int i = 0; i < 4; i++) {
  644. vec3 ray_to = base_pos + rays[i] * texel_size;
  645. float d;
  646. vec3 norm;
  647. if (trace_ray_closest_hit_distance(base_pos, ray_to, d, norm) == RAY_BACK) {
  648. if (d < min_d) {
  649. // This bias needs to be greater than the regular bias, because otherwise later, rays will go the other side when pointing back.
  650. vertex_pos = base_pos + rays[i] * d + norm * bake_params.bias * 10.0;
  651. min_d = d;
  652. }
  653. }
  654. }
  655. position_alpha.xyz = vertex_pos;
  656. imageStore(position, ivec3(atlas_pos, params.atlas_slice), position_alpha);
  657. #endif
  658. #ifdef MODE_LIGHT_PROBES
  659. vec3 position = probe_positions.data[probe_index].xyz;
  660. vec4 probe_sh_accum[9] = vec4[](
  661. vec4(0.0),
  662. vec4(0.0),
  663. vec4(0.0),
  664. vec4(0.0),
  665. vec4(0.0),
  666. vec4(0.0),
  667. vec4(0.0),
  668. vec4(0.0),
  669. vec4(0.0));
  670. uint noise = random_seed(ivec3(params.ray_from, probe_index, 49502741 /* some prime */));
  671. for (uint i = params.ray_from; i < params.ray_to; i++) {
  672. vec3 ray_dir = generate_sphere_uniform_direction(noise);
  673. vec3 light = trace_indirect_light(position, ray_dir, noise, 0.0);
  674. float c[9] = float[](
  675. 0.282095, //l0
  676. 0.488603 * ray_dir.y, //l1n1
  677. 0.488603 * ray_dir.z, //l1n0
  678. 0.488603 * ray_dir.x, //l1p1
  679. 1.092548 * ray_dir.x * ray_dir.y, //l2n2
  680. 1.092548 * ray_dir.y * ray_dir.z, //l2n1
  681. //0.315392 * (ray_dir.x * ray_dir.x + ray_dir.y * ray_dir.y + 2.0 * ray_dir.z * ray_dir.z), //l20
  682. 0.315392 * (3.0 * ray_dir.z * ray_dir.z - 1.0), //l20
  683. 1.092548 * ray_dir.x * ray_dir.z, //l2p1
  684. 0.546274 * (ray_dir.x * ray_dir.x - ray_dir.y * ray_dir.y) //l2p2
  685. );
  686. for (uint j = 0; j < 9; j++) {
  687. probe_sh_accum[j].rgb += light * c[j];
  688. }
  689. }
  690. if (params.ray_from > 0) {
  691. for (uint j = 0; j < 9; j++) { //accum from existing
  692. probe_sh_accum[j] += light_probes.data[probe_index * 9 + j];
  693. }
  694. }
  695. if (params.ray_to == params.ray_count) {
  696. for (uint j = 0; j < 9; j++) { //accum from existing
  697. probe_sh_accum[j] *= 4.0 / float(params.ray_count);
  698. }
  699. }
  700. for (uint j = 0; j < 9; j++) { //accum from existing
  701. light_probes.data[probe_index * 9 + j] = probe_sh_accum[j];
  702. }
  703. #endif
  704. #ifdef MODE_DILATE
  705. vec4 c = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0);
  706. //sides first, as they are closer
  707. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 0), params.atlas_slice), 0);
  708. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, 1), params.atlas_slice), 0);
  709. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 0), params.atlas_slice), 0);
  710. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, -1), params.atlas_slice), 0);
  711. //endpoints second
  712. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, -1), params.atlas_slice), 0);
  713. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 1), params.atlas_slice), 0);
  714. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, -1), params.atlas_slice), 0);
  715. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 1), params.atlas_slice), 0);
  716. //far sides third
  717. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 0), params.atlas_slice), 0);
  718. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, 2), params.atlas_slice), 0);
  719. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 0), params.atlas_slice), 0);
  720. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, -2), params.atlas_slice), 0);
  721. //far-mid endpoints
  722. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, -1), params.atlas_slice), 0);
  723. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 1), params.atlas_slice), 0);
  724. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, -1), params.atlas_slice), 0);
  725. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 1), params.atlas_slice), 0);
  726. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, -2), params.atlas_slice), 0);
  727. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 2), params.atlas_slice), 0);
  728. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, -2), params.atlas_slice), 0);
  729. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 2), params.atlas_slice), 0);
  730. //far endpoints
  731. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, -2), params.atlas_slice), 0);
  732. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 2), params.atlas_slice), 0);
  733. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, -2), params.atlas_slice), 0);
  734. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 2), params.atlas_slice), 0);
  735. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), c);
  736. #endif
  737. #ifdef MODE_DENOISE
  738. // Joint Non-local means (JNLM) denoiser.
  739. //
  740. // Based on YoctoImageDenoiser's JNLM implementation with corrections from "Nonlinearly Weighted First-order Regression for Denoising Monte Carlo Renderings".
  741. //
  742. // <https://github.com/ManuelPrandini/YoctoImageDenoiser/blob/06e19489dd64e47792acffde536393802ba48607/libs/yocto_extension/yocto_extension.cpp#L207>
  743. // <https://benedikt-bitterli.me/nfor/nfor.pdf>
  744. //
  745. // MIT License
  746. //
  747. // Copyright (c) 2020 ManuelPrandini
  748. //
  749. // Permission is hereby granted, free of charge, to any person obtaining a copy
  750. // of this software and associated documentation files (the "Software"), to deal
  751. // in the Software without restriction, including without limitation the rights
  752. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  753. // copies of the Software, and to permit persons to whom the Software is
  754. // furnished to do so, subject to the following conditions:
  755. //
  756. // The above copyright notice and this permission notice shall be included in all
  757. // copies or substantial portions of the Software.
  758. //
  759. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  760. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  761. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  762. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  763. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  764. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  765. // SOFTWARE.
  766. //
  767. // Most of the constants below have been hand-picked to fit the common scenarios lightmaps
  768. // are generated with, but they can be altered freely to experiment and achieve better results.
  769. // Half the size of the patch window around each pixel that is weighted to compute the denoised pixel.
  770. // A value of 1 represents a 3x3 window, a value of 2 a 5x5 window, etc.
  771. const int HALF_PATCH_WINDOW = 3;
  772. // Half the size of the search window around each pixel that is denoised and weighted to compute the denoised pixel.
  773. const int HALF_SEARCH_WINDOW = denoise_params.half_search_window;
  774. // For all of the following sigma values, smaller values will give less weight to pixels that have a bigger distance
  775. // in the feature being evaluated. Therefore, smaller values are likely to cause more noise to appear, but will also
  776. // cause less features to be erased in the process.
  777. // Controls how much the spatial distance of the pixels influences the denoising weight.
  778. const float SIGMA_SPATIAL = denoise_params.spatial_bandwidth;
  779. // Controls how much the light color distance of the pixels influences the denoising weight.
  780. const float SIGMA_LIGHT = denoise_params.light_bandwidth;
  781. // Controls how much the albedo color distance of the pixels influences the denoising weight.
  782. const float SIGMA_ALBEDO = denoise_params.albedo_bandwidth;
  783. // Controls how much the normal vector distance of the pixels influences the denoising weight.
  784. const float SIGMA_NORMAL = denoise_params.normal_bandwidth;
  785. // Strength of the filter. The original paper recommends values around 10 to 15 times the Sigma parameter.
  786. const float FILTER_VALUE = denoise_params.filter_strength * SIGMA_LIGHT;
  787. // Formula constants.
  788. const int PATCH_WINDOW_DIMENSION = (HALF_PATCH_WINDOW * 2 + 1);
  789. const int PATCH_WINDOW_DIMENSION_SQUARE = (PATCH_WINDOW_DIMENSION * PATCH_WINDOW_DIMENSION);
  790. const float TWO_SIGMA_SPATIAL_SQUARE = 2.0f * SIGMA_SPATIAL * SIGMA_SPATIAL;
  791. const float TWO_SIGMA_LIGHT_SQUARE = 2.0f * SIGMA_LIGHT * SIGMA_LIGHT;
  792. const float TWO_SIGMA_ALBEDO_SQUARE = 2.0f * SIGMA_ALBEDO * SIGMA_ALBEDO;
  793. const float TWO_SIGMA_NORMAL_SQUARE = 2.0f * SIGMA_NORMAL * SIGMA_NORMAL;
  794. const float FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE = FILTER_VALUE * FILTER_VALUE * TWO_SIGMA_LIGHT_SQUARE;
  795. const float EPSILON = 1e-6f;
  796. #ifdef USE_SH_LIGHTMAPS
  797. const uint slice_count = 4;
  798. const uint slice_base = params.atlas_slice * slice_count;
  799. #else
  800. const uint slice_count = 1;
  801. const uint slice_base = params.atlas_slice;
  802. #endif
  803. for (uint i = 0; i < slice_count; i++) {
  804. uint lightmap_slice = slice_base + i;
  805. vec3 denoised_rgb = vec3(0.0f);
  806. vec4 input_light = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, lightmap_slice), 0);
  807. vec3 input_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).rgb;
  808. vec3 input_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  809. if (length(input_normal) > EPSILON) {
  810. // Compute the denoised pixel if the normal is valid.
  811. float sum_weights = 0.0f;
  812. vec3 input_rgb = input_light.rgb;
  813. for (int search_y = -HALF_SEARCH_WINDOW; search_y <= HALF_SEARCH_WINDOW; search_y++) {
  814. for (int search_x = -HALF_SEARCH_WINDOW; search_x <= HALF_SEARCH_WINDOW; search_x++) {
  815. ivec2 search_pos = atlas_pos + ivec2(search_x, search_y);
  816. vec3 search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(search_pos, lightmap_slice), 0).rgb;
  817. vec3 search_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).rgb;
  818. vec3 search_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).xyz;
  819. float patch_square_dist = 0.0f;
  820. for (int offset_y = -HALF_PATCH_WINDOW; offset_y <= HALF_PATCH_WINDOW; offset_y++) {
  821. for (int offset_x = -HALF_PATCH_WINDOW; offset_x <= HALF_PATCH_WINDOW; offset_x++) {
  822. ivec2 offset_input_pos = atlas_pos + ivec2(offset_x, offset_y);
  823. ivec2 offset_search_pos = search_pos + ivec2(offset_x, offset_y);
  824. vec3 offset_input_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_input_pos, lightmap_slice), 0).rgb;
  825. vec3 offset_search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_search_pos, lightmap_slice), 0).rgb;
  826. vec3 offset_delta_rgb = offset_input_rgb - offset_search_rgb;
  827. patch_square_dist += dot(offset_delta_rgb, offset_delta_rgb) - TWO_SIGMA_LIGHT_SQUARE;
  828. }
  829. }
  830. patch_square_dist = max(0.0f, patch_square_dist / (3.0f * PATCH_WINDOW_DIMENSION_SQUARE));
  831. float weight = 1.0f;
  832. // Ignore weight if search position is out of bounds.
  833. weight *= step(0, search_pos.x) * step(search_pos.x, bake_params.atlas_size.x - 1);
  834. weight *= step(0, search_pos.y) * step(search_pos.y, bake_params.atlas_size.y - 1);
  835. // Ignore weight if normal is zero length.
  836. weight *= step(EPSILON, length(search_normal));
  837. // Weight with pixel distance.
  838. vec2 pixel_delta = vec2(search_x, search_y);
  839. float pixel_square_dist = dot(pixel_delta, pixel_delta);
  840. weight *= exp(-pixel_square_dist / TWO_SIGMA_SPATIAL_SQUARE);
  841. // Weight with patch.
  842. weight *= exp(-patch_square_dist / FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE);
  843. // Weight with albedo.
  844. vec3 albedo_delta = input_albedo - search_albedo;
  845. float albedo_square_dist = dot(albedo_delta, albedo_delta);
  846. weight *= exp(-albedo_square_dist / TWO_SIGMA_ALBEDO_SQUARE);
  847. // Weight with normal.
  848. vec3 normal_delta = input_normal - search_normal;
  849. float normal_square_dist = dot(normal_delta, normal_delta);
  850. weight *= exp(-normal_square_dist / TWO_SIGMA_NORMAL_SQUARE);
  851. denoised_rgb += weight * search_rgb;
  852. sum_weights += weight;
  853. }
  854. }
  855. denoised_rgb /= sum_weights;
  856. } else {
  857. // Ignore pixels where the normal is empty, just copy the light color.
  858. denoised_rgb = input_light.rgb;
  859. }
  860. imageStore(dest_light, ivec3(atlas_pos, lightmap_slice), vec4(denoised_rgb, input_light.a));
  861. }
  862. #endif
  863. #ifdef MODE_PACK_L1_COEFFS
  864. vec4 base_coeff = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice * 4), 0);
  865. for (int i = 1; i < 4; i++) {
  866. vec4 c = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice * 4 + i), 0);
  867. if (abs(base_coeff.r) > 0.0) {
  868. c.r /= (base_coeff.r * 8);
  869. }
  870. if (abs(base_coeff.g) > 0.0) {
  871. c.g /= (base_coeff.g * 8);
  872. }
  873. if (abs(base_coeff.b) > 0.0) {
  874. c.b /= (base_coeff.b * 8);
  875. }
  876. c.rgb += vec3(0.5);
  877. c.rgb = clamp(c.rgb, vec3(0.0), vec3(1.0));
  878. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice * 4 + i), c);
  879. }
  880. #endif
  881. }