lm_compute.glsl 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279
  1. #[versions]
  2. primary = "#define MODE_DIRECT_LIGHT";
  3. secondary = "#define MODE_BOUNCE_LIGHT";
  4. dilate = "#define MODE_DILATE";
  5. unocclude = "#define MODE_UNOCCLUDE";
  6. light_probes = "#define MODE_LIGHT_PROBES";
  7. denoise = "#define MODE_DENOISE";
  8. pack_coeffs = "#define MODE_PACK_L1_COEFFS";
  9. #[compute]
  10. #version 450
  11. #VERSION_DEFINES
  12. #extension GL_EXT_samplerless_texture_functions : enable
  13. // One 2D local group focusing in one layer at a time, though all
  14. // in parallel (no barriers) makes more sense than a 3D local group
  15. // as this can take more advantage of the cache for each group.
  16. #ifdef MODE_LIGHT_PROBES
  17. layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  18. #else
  19. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  20. #endif
  21. #include "lm_common_inc.glsl"
  22. #ifdef MODE_LIGHT_PROBES
  23. layout(set = 1, binding = 0, std430) restrict buffer LightProbeData {
  24. vec4 data[];
  25. }
  26. light_probes;
  27. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  28. layout(set = 1, binding = 2) uniform texture2D environment;
  29. #endif
  30. #ifdef MODE_UNOCCLUDE
  31. layout(rgba32f, set = 1, binding = 0) uniform restrict image2DArray position;
  32. layout(rgba32f, set = 1, binding = 1) uniform restrict readonly image2DArray unocclude;
  33. #endif
  34. #if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT)
  35. layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
  36. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  37. layout(set = 1, binding = 2) uniform texture2DArray source_position;
  38. layout(set = 1, binding = 3) uniform texture2DArray source_normal;
  39. layout(rgba16f, set = 1, binding = 4) uniform restrict image2DArray accum_light;
  40. #endif
  41. #if defined(MODE_DIRECT_LIGHT) && defined(USE_SHADOWMASK)
  42. layout(rgba8, set = 1, binding = 5) uniform restrict writeonly image2DArray shadowmask;
  43. #elif defined(MODE_BOUNCE_LIGHT)
  44. layout(set = 1, binding = 5) uniform texture2D environment;
  45. #endif
  46. #if defined(MODE_DILATE) || defined(MODE_DENOISE) || defined(MODE_PACK_L1_COEFFS)
  47. layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
  48. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  49. #endif
  50. #ifdef MODE_DENOISE
  51. layout(set = 1, binding = 2) uniform texture2DArray source_normal;
  52. layout(set = 1, binding = 3) uniform DenoiseParams {
  53. float spatial_bandwidth;
  54. float light_bandwidth;
  55. float albedo_bandwidth;
  56. float normal_bandwidth;
  57. int half_search_window;
  58. float filter_strength;
  59. }
  60. denoise_params;
  61. #endif
  62. layout(push_constant, std430) uniform Params {
  63. uint atlas_slice;
  64. uint ray_count;
  65. uint ray_from;
  66. uint ray_to;
  67. ivec2 region_ofs;
  68. uint probe_count;
  69. }
  70. params;
  71. //check it, but also return distance and barycentric coords (for uv lookup)
  72. bool ray_hits_triangle(vec3 from, vec3 dir, float max_dist, vec3 p0, vec3 p1, vec3 p2, out float r_distance, out vec3 r_barycentric) {
  73. const float EPSILON = 0.00001;
  74. const vec3 e0 = p1 - p0;
  75. const vec3 e1 = p0 - p2;
  76. vec3 triangle_normal = cross(e1, e0);
  77. float n_dot_dir = dot(triangle_normal, dir);
  78. if (abs(n_dot_dir) < EPSILON) {
  79. return false;
  80. }
  81. const vec3 e2 = (p0 - from) / n_dot_dir;
  82. const vec3 i = cross(dir, e2);
  83. r_barycentric.y = dot(i, e1);
  84. r_barycentric.z = dot(i, e0);
  85. r_barycentric.x = 1.0 - (r_barycentric.z + r_barycentric.y);
  86. r_distance = dot(triangle_normal, e2);
  87. return (r_distance > bake_params.bias) && (r_distance < max_dist) && all(greaterThanEqual(r_barycentric, vec3(0.0)));
  88. }
  89. const uint RAY_MISS = 0;
  90. const uint RAY_FRONT = 1;
  91. const uint RAY_BACK = 2;
  92. const uint RAY_ANY = 3;
  93. bool ray_box_test(vec3 p_from, vec3 p_inv_dir, vec3 p_box_min, vec3 p_box_max) {
  94. vec3 t0 = (p_box_min - p_from) * p_inv_dir;
  95. vec3 t1 = (p_box_max - p_from) * p_inv_dir;
  96. vec3 tmin = min(t0, t1), tmax = max(t0, t1);
  97. return max(tmin.x, max(tmin.y, tmin.z)) <= min(tmax.x, min(tmax.y, tmax.z));
  98. }
  99. #if CLUSTER_SIZE > 32
  100. #define CLUSTER_TRIANGLE_ITERATION
  101. #endif
  102. uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) {
  103. // World coordinates.
  104. vec3 rel = p_to - p_from;
  105. float rel_len = length(rel);
  106. vec3 dir = normalize(rel);
  107. vec3 inv_dir = 1.0 / dir;
  108. // Cell coordinates.
  109. vec3 from_cell = (p_from - bake_params.to_cell_offset) * bake_params.to_cell_size;
  110. vec3 to_cell = (p_to - bake_params.to_cell_offset) * bake_params.to_cell_size;
  111. // Prepare DDA.
  112. vec3 rel_cell = to_cell - from_cell;
  113. ivec3 icell = ivec3(from_cell);
  114. ivec3 iendcell = ivec3(to_cell);
  115. vec3 dir_cell = normalize(rel_cell);
  116. vec3 delta = min(abs(1.0 / dir_cell), bake_params.grid_size); // Use bake_params.grid_size as max to prevent infinity values.
  117. ivec3 step = ivec3(sign(rel_cell));
  118. const vec3 init_next_cell = vec3(icell) + max(vec3(0), sign(step));
  119. vec3 t_max = mix(vec3(0), (init_next_cell - from_cell) / dir_cell, notEqual(step, vec3(0))); // Distance to next boundary.
  120. uint iters = 0;
  121. while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) {
  122. uvec2 cell_data = texelFetch(grid, icell, 0).xy;
  123. uint triangle_count = cell_data.x;
  124. if (triangle_count > 0) {
  125. uint hit = RAY_MISS;
  126. float best_distance = 1e20;
  127. uint cluster_start = cluster_indices.data[cell_data.y * 2];
  128. uint cell_triangle_start = cluster_indices.data[cell_data.y * 2 + 1];
  129. uint cluster_count = (triangle_count + CLUSTER_SIZE - 1) / CLUSTER_SIZE;
  130. uint cluster_base_index = 0;
  131. while (cluster_base_index < cluster_count) {
  132. // To minimize divergence, all Ray-AABB tests on the clusters contained in the cell are performed
  133. // before checking against the triangles. We do this 32 clusters at a time and store the intersected
  134. // clusters on each bit of the 32-bit integer.
  135. uint cluster_test_count = min(32, cluster_count - cluster_base_index);
  136. uint cluster_hits = 0;
  137. for (uint i = 0; i < cluster_test_count; i++) {
  138. uint cluster_index = cluster_start + cluster_base_index + i;
  139. ClusterAABB cluster_aabb = cluster_aabbs.data[cluster_index];
  140. if (ray_box_test(p_from, inv_dir, cluster_aabb.min_bounds, cluster_aabb.max_bounds)) {
  141. cluster_hits |= (1 << i);
  142. }
  143. }
  144. // Check the triangles in any of the clusters that were intersected by toggling off the bits in the
  145. // 32-bit integer counter until no bits are left.
  146. while (cluster_hits > 0) {
  147. uint cluster_index = findLSB(cluster_hits);
  148. cluster_hits &= ~(1 << cluster_index);
  149. cluster_index += cluster_base_index;
  150. // Do the same divergence execution trick with triangles as well.
  151. uint triangle_base_index = 0;
  152. #ifdef CLUSTER_TRIANGLE_ITERATION
  153. while (triangle_base_index < triangle_count)
  154. #endif
  155. {
  156. uint triangle_start_index = cell_triangle_start + cluster_index * CLUSTER_SIZE + triangle_base_index;
  157. uint triangle_test_count = min(CLUSTER_SIZE, triangle_count - triangle_base_index);
  158. uint triangle_hits = 0;
  159. for (uint i = 0; i < triangle_test_count; i++) {
  160. uint triangle_index = triangle_indices.data[triangle_start_index + i];
  161. if (ray_box_test(p_from, inv_dir, triangles.data[triangle_index].min_bounds, triangles.data[triangle_index].max_bounds)) {
  162. triangle_hits |= (1 << i);
  163. }
  164. }
  165. while (triangle_hits > 0) {
  166. uint cluster_triangle_index = findLSB(triangle_hits);
  167. triangle_hits &= ~(1 << cluster_triangle_index);
  168. cluster_triangle_index += triangle_start_index;
  169. uint triangle_index = triangle_indices.data[cluster_triangle_index];
  170. Triangle triangle = triangles.data[triangle_index];
  171. // Gather the triangle vertex positions.
  172. vec3 vtx0 = vertices.data[triangle.indices.x].position;
  173. vec3 vtx1 = vertices.data[triangle.indices.y].position;
  174. vec3 vtx2 = vertices.data[triangle.indices.z].position;
  175. vec3 normal = -normalize(cross((vtx0 - vtx1), (vtx0 - vtx2)));
  176. bool backface = dot(normal, dir) >= 0.0;
  177. float distance;
  178. vec3 barycentric;
  179. if (ray_hits_triangle(p_from, dir, rel_len, vtx0, vtx1, vtx2, distance, barycentric)) {
  180. if (p_any_hit) {
  181. // Return early if any hit was requested.
  182. return RAY_ANY;
  183. }
  184. vec3 position = p_from + dir * distance;
  185. vec3 hit_cell = (position - bake_params.to_cell_offset) * bake_params.to_cell_size;
  186. if (icell != ivec3(hit_cell)) {
  187. // It's possible for the ray to hit a triangle in a position outside the bounds of the cell
  188. // if it's large enough to cover multiple ones. The hit must be ignored if this is the case.
  189. continue;
  190. }
  191. if (!backface) {
  192. // The case of meshes having both a front and back face in the same plane is more common than
  193. // expected, so if this is a front-face, bias it closer to the ray origin, so it always wins
  194. // over the back-face.
  195. distance = max(bake_params.bias, distance - bake_params.bias);
  196. }
  197. if (distance < best_distance) {
  198. switch (triangle.cull_mode) {
  199. case CULL_DISABLED:
  200. backface = false;
  201. break;
  202. case CULL_FRONT:
  203. backface = !backface;
  204. break;
  205. case CULL_BACK: // Default behavior.
  206. break;
  207. }
  208. hit = backface ? RAY_BACK : RAY_FRONT;
  209. best_distance = distance;
  210. r_distance = distance;
  211. r_normal = normal;
  212. r_triangle = triangle_index;
  213. r_barycentric = barycentric;
  214. }
  215. }
  216. }
  217. #ifdef CLUSTER_TRIANGLE_ITERATION
  218. triangle_base_index += CLUSTER_SIZE;
  219. #endif
  220. }
  221. }
  222. cluster_base_index += 32;
  223. }
  224. if (hit != RAY_MISS) {
  225. return hit;
  226. }
  227. }
  228. if (icell == iendcell) {
  229. break;
  230. }
  231. // There should be only one axis updated at a time for DDA to work properly.
  232. if (t_max.x < t_max.y && t_max.x < t_max.z) {
  233. icell.x += step.x;
  234. t_max.x += delta.x;
  235. } else if (t_max.y < t_max.z) {
  236. icell.y += step.y;
  237. t_max.y += delta.y;
  238. } else {
  239. icell.z += step.z;
  240. t_max.z += delta.z;
  241. }
  242. iters++;
  243. }
  244. return RAY_MISS;
  245. }
  246. uint trace_ray_closest_hit_triangle(vec3 p_from, vec3 p_to, out uint r_triangle, out vec3 r_barycentric) {
  247. float distance;
  248. vec3 normal;
  249. return trace_ray(p_from, p_to, false, distance, normal, r_triangle, r_barycentric);
  250. }
  251. uint trace_ray_closest_hit_triangle_albedo_alpha(vec3 p_from, vec3 p_to, out vec4 albedo_alpha, out vec3 hit_position) {
  252. float distance;
  253. vec3 normal;
  254. uint tidx;
  255. vec3 barycentric;
  256. uint ret = trace_ray(p_from, p_to, false, distance, normal, tidx, barycentric);
  257. if (ret != RAY_MISS) {
  258. Vertex vert0 = vertices.data[triangles.data[tidx].indices.x];
  259. Vertex vert1 = vertices.data[triangles.data[tidx].indices.y];
  260. Vertex vert2 = vertices.data[triangles.data[tidx].indices.z];
  261. vec3 uvw = vec3(barycentric.x * vert0.uv + barycentric.y * vert1.uv + barycentric.z * vert2.uv, float(triangles.data[tidx].slice));
  262. albedo_alpha = textureLod(sampler2DArray(albedo_tex, linear_sampler), uvw, 0);
  263. hit_position = barycentric.x * vert0.position + barycentric.y * vert1.position + barycentric.z * vert2.position;
  264. }
  265. return ret;
  266. }
  267. uint trace_ray_closest_hit_distance(vec3 p_from, vec3 p_to, out float r_distance, out vec3 r_normal) {
  268. uint triangle;
  269. vec3 barycentric;
  270. return trace_ray(p_from, p_to, false, r_distance, r_normal, triangle, barycentric);
  271. }
  272. uint trace_ray_any_hit(vec3 p_from, vec3 p_to) {
  273. float distance;
  274. vec3 normal;
  275. uint triangle;
  276. vec3 barycentric;
  277. return trace_ray(p_from, p_to, true, distance, normal, triangle, barycentric);
  278. }
  279. // https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
  280. uint hash(uint value) {
  281. uint state = value * 747796405u + 2891336453u;
  282. uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
  283. return (word >> 22u) ^ word;
  284. }
  285. uint random_seed(ivec3 seed) {
  286. return hash(seed.x ^ hash(seed.y ^ hash(seed.z)));
  287. }
  288. // generates a random value in range [0.0, 1.0)
  289. float randomize(inout uint value) {
  290. value = hash(value);
  291. return float(value / 4294967296.0);
  292. }
  293. const float PI = 3.14159265f;
  294. // http://www.realtimerendering.com/raytracinggems/unofficial_RayTracingGems_v1.4.pdf (chapter 15)
  295. vec3 generate_hemisphere_cosine_weighted_direction(inout uint noise) {
  296. float noise1 = randomize(noise);
  297. float noise2 = randomize(noise) * 2.0 * PI;
  298. return vec3(sqrt(noise1) * cos(noise2), sqrt(noise1) * sin(noise2), sqrt(1.0 - noise1));
  299. }
  300. // Distribution generation adapted from "Generating uniformly distributed numbers on a sphere"
  301. // <http://corysimon.github.io/articles/uniformdistn-on-sphere/>
  302. vec3 generate_sphere_uniform_direction(inout uint noise) {
  303. float theta = 2.0 * PI * randomize(noise);
  304. float phi = acos(1.0 - 2.0 * randomize(noise));
  305. return vec3(sin(phi) * cos(theta), sin(phi) * sin(theta), cos(phi));
  306. }
  307. vec3 generate_ray_dir_from_normal(vec3 normal, inout uint noise) {
  308. vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
  309. vec3 tangent = normalize(cross(v0, normal));
  310. vec3 bitangent = normalize(cross(tangent, normal));
  311. mat3 normal_mat = mat3(tangent, bitangent, normal);
  312. return normal_mat * generate_hemisphere_cosine_weighted_direction(noise);
  313. }
  314. #if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
  315. float get_omni_attenuation(float distance, float inv_range, float decay) {
  316. float nd = distance * inv_range;
  317. nd *= nd;
  318. nd *= nd; // nd^4
  319. nd = max(1.0 - nd, 0.0);
  320. nd *= nd; // nd^2
  321. return nd * pow(max(distance, 0.0001), -decay);
  322. }
  323. const int AA_SAMPLES = 16;
  324. const vec2 halton_map[AA_SAMPLES] = vec2[](
  325. vec2(0.5, 0.33333333),
  326. vec2(0.25, 0.66666667),
  327. vec2(0.75, 0.11111111),
  328. vec2(0.125, 0.44444444),
  329. vec2(0.625, 0.77777778),
  330. vec2(0.375, 0.22222222),
  331. vec2(0.875, 0.55555556),
  332. vec2(0.0625, 0.88888889),
  333. vec2(0.5625, 0.03703704),
  334. vec2(0.3125, 0.37037037),
  335. vec2(0.8125, 0.7037037),
  336. vec2(0.1875, 0.14814815),
  337. vec2(0.6875, 0.48148148),
  338. vec2(0.4375, 0.81481481),
  339. vec2(0.9375, 0.25925926),
  340. vec2(0.03125, 0.59259259));
  341. vec2 get_vogel_disk(float p_i, float p_rotation, float p_sample_count_sqrt) {
  342. const float golden_angle = 2.4;
  343. float r = sqrt(p_i + 0.5) / p_sample_count_sqrt;
  344. float theta = p_i * golden_angle + p_rotation;
  345. return vec2(cos(theta), sin(theta)) * r;
  346. }
  347. void trace_direct_light(vec3 p_position, vec3 p_normal, uint p_light_index, bool p_soft_shadowing, out vec3 r_light, out vec3 r_light_dir, inout uint r_noise, float p_texel_size, out float r_shadow) {
  348. const float EPSILON = 0.00001;
  349. r_light = vec3(0.0f);
  350. r_shadow = 0.0f;
  351. vec3 light_pos;
  352. float dist;
  353. float attenuation;
  354. float soft_shadowing_disk_size;
  355. Light light_data = lights.data[p_light_index];
  356. if (light_data.type == LIGHT_TYPE_DIRECTIONAL) {
  357. vec3 light_vec = light_data.direction;
  358. light_pos = p_position - light_vec * length(bake_params.world_size);
  359. r_light_dir = normalize(light_pos - p_position);
  360. dist = length(bake_params.world_size);
  361. attenuation = 1.0;
  362. soft_shadowing_disk_size = light_data.size;
  363. } else {
  364. light_pos = light_data.position;
  365. r_light_dir = normalize(light_pos - p_position);
  366. dist = distance(p_position, light_pos);
  367. if (dist > light_data.range) {
  368. return;
  369. }
  370. soft_shadowing_disk_size = light_data.size / dist;
  371. attenuation = get_omni_attenuation(dist, 1.0 / light_data.range, light_data.attenuation);
  372. if (light_data.type == LIGHT_TYPE_SPOT) {
  373. vec3 rel = normalize(p_position - light_pos);
  374. float cos_spot_angle = light_data.cos_spot_angle;
  375. float cos_angle = dot(rel, light_data.direction);
  376. if (cos_angle < cos_spot_angle) {
  377. return;
  378. }
  379. float scos = max(cos_angle, cos_spot_angle);
  380. float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle));
  381. attenuation *= 1.0 - pow(spot_rim, light_data.inv_spot_attenuation);
  382. }
  383. }
  384. attenuation *= max(0.0, dot(p_normal, r_light_dir));
  385. if (attenuation <= 0.0001) {
  386. return;
  387. }
  388. float penumbra = 0.0;
  389. vec3 penumbra_color = vec3(0.0);
  390. if (p_soft_shadowing) {
  391. const bool use_soft_shadows = (light_data.size > 0.0);
  392. const uint ray_count = AA_SAMPLES;
  393. const uint total_ray_count = use_soft_shadows ? params.ray_count : ray_count;
  394. const uint shadowing_rays_check_penumbra_denom = 2;
  395. const uint shadowing_ray_count = max(1, params.ray_count / ray_count);
  396. const float shadowing_ray_count_sqrt = sqrt(float(total_ray_count));
  397. // Setup tangent pass to calculate AA samples over the current texel.
  398. vec3 aux = p_normal.y < 0.777 ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0);
  399. vec3 tangent = normalize(cross(p_normal, aux));
  400. vec3 bitan = normalize(cross(p_normal, tangent));
  401. // Setup light tangent pass to calculate samples over disk aligned towards the light
  402. vec3 light_to_point = -r_light_dir;
  403. vec3 light_aux = light_to_point.y < 0.777 ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0);
  404. vec3 light_to_point_tan = normalize(cross(light_to_point, light_aux));
  405. vec3 light_to_point_bitan = normalize(cross(light_to_point, light_to_point_tan));
  406. float aa_power = 0.0;
  407. for (uint i = 0; i < ray_count; i++) {
  408. // Create a random sample within the texel.
  409. vec2 disk_sample = (halton_map[i] - vec2(0.5)) * p_texel_size * light_data.shadow_blur;
  410. // Align the sample to world space.
  411. vec3 disk_aligned = (disk_sample.x * tangent + disk_sample.y * bitan);
  412. vec3 origin = p_position - disk_aligned;
  413. vec3 light_dir = normalize(light_pos - origin);
  414. float power = 0.0;
  415. vec3 light_color = vec3(0.0);
  416. uint power_accm = 0;
  417. vec3 prev_pos = origin;
  418. if (use_soft_shadows) {
  419. uint soft_shadow_hits = 0;
  420. for (uint j = 0; j < shadowing_ray_count; j++) {
  421. origin = prev_pos;
  422. // Optimization:
  423. // Once already traced an important proportion of rays, if all are hits or misses,
  424. // assume we're not in the penumbra so we can infer the rest would have the same result.
  425. if (j == shadowing_ray_count / shadowing_rays_check_penumbra_denom) {
  426. if (soft_shadow_hits == j) {
  427. // Assume totally lit
  428. soft_shadow_hits = shadowing_ray_count;
  429. break;
  430. } else if (soft_shadow_hits == 0) {
  431. // Assume totally dark
  432. soft_shadow_hits = 0;
  433. break;
  434. }
  435. }
  436. float a = randomize(r_noise) * 2.0 * PI;
  437. float vogel_index = float(total_ray_count - 1 - (i * shadowing_ray_count + j)); // Start from (total_ray_count - 1) so we check the outer points first.
  438. vec2 light_disk_sample = get_vogel_disk(vogel_index, a, shadowing_ray_count_sqrt) * soft_shadowing_disk_size * light_data.shadow_blur;
  439. vec3 light_disk_to_point = normalize(light_to_point + light_disk_sample.x * light_to_point_tan + light_disk_sample.y * light_to_point_bitan);
  440. float sample_penumbra = 0.0;
  441. vec3 sample_penumbra_color = light_data.color.rgb;
  442. bool sample_did_hit = false;
  443. for (uint iter = 0; iter < bake_params.transparency_rays; iter++) {
  444. vec4 hit_albedo = vec4(1.0);
  445. vec3 hit_position;
  446. // Offset the ray origin for AA, offset the light position for soft shadows.
  447. uint ret = trace_ray_closest_hit_triangle_albedo_alpha(origin - light_disk_to_point * (bake_params.bias + length(disk_sample)), p_position - light_disk_to_point * dist, hit_albedo, hit_position);
  448. if (ret == RAY_MISS) {
  449. if (!sample_did_hit) {
  450. sample_penumbra = 1.0;
  451. }
  452. soft_shadow_hits += 1;
  453. break;
  454. } else if (ret == RAY_FRONT || ret == RAY_BACK) {
  455. bool contribute = ret == RAY_FRONT || !sample_did_hit;
  456. if (!sample_did_hit) {
  457. sample_penumbra = 1.0;
  458. sample_did_hit = true;
  459. }
  460. soft_shadow_hits += 1;
  461. if (contribute) {
  462. sample_penumbra_color = mix(sample_penumbra_color, sample_penumbra_color * hit_albedo.rgb, hit_albedo.a);
  463. sample_penumbra *= 1.0 - hit_albedo.a;
  464. }
  465. origin = hit_position + r_light_dir * bake_params.bias;
  466. if (sample_penumbra - EPSILON <= 0) {
  467. break;
  468. }
  469. }
  470. }
  471. power += sample_penumbra;
  472. light_color += sample_penumbra_color;
  473. power_accm++;
  474. }
  475. } else { // No soft shadows (size == 0).
  476. float sample_penumbra = 0.0;
  477. vec3 sample_penumbra_color = light_data.color.rgb;
  478. bool sample_did_hit = false;
  479. for (uint iter = 0; iter < bake_params.transparency_rays; iter++) {
  480. vec4 hit_albedo = vec4(1.0);
  481. vec3 hit_position;
  482. // Offset the ray origin for AA, offset the light position for soft shadows.
  483. uint ret = trace_ray_closest_hit_triangle_albedo_alpha(origin + light_dir * (bake_params.bias + length(disk_sample)), light_pos, hit_albedo, hit_position);
  484. if (ret == RAY_MISS) {
  485. if (!sample_did_hit) {
  486. sample_penumbra = 1.0;
  487. }
  488. break;
  489. } else if (ret == RAY_FRONT || ret == RAY_BACK) {
  490. bool contribute = ret == RAY_FRONT || !sample_did_hit;
  491. if (!sample_did_hit) {
  492. sample_penumbra = 1.0;
  493. sample_did_hit = true;
  494. }
  495. if (contribute) {
  496. sample_penumbra_color = mix(sample_penumbra_color, sample_penumbra_color * hit_albedo.rgb, hit_albedo.a);
  497. sample_penumbra *= 1.0 - hit_albedo.a;
  498. }
  499. origin = hit_position + r_light_dir * bake_params.bias;
  500. if (sample_penumbra - EPSILON <= 0) {
  501. break;
  502. }
  503. }
  504. }
  505. power = sample_penumbra;
  506. light_color = sample_penumbra_color;
  507. power_accm = 1;
  508. }
  509. aa_power += power / float(power_accm);
  510. penumbra_color += light_color / float(power_accm);
  511. }
  512. penumbra = aa_power / ray_count;
  513. penumbra_color /= ray_count;
  514. } else { // No soft shadows and anti-aliasing (disabled via parameter).
  515. bool did_hit = false;
  516. penumbra = 0.0;
  517. penumbra_color = light_data.color.rgb;
  518. for (uint iter = 0; iter < bake_params.transparency_rays; iter++) {
  519. vec4 hit_albedo = vec4(1.0);
  520. vec3 hit_position;
  521. uint ret = trace_ray_closest_hit_triangle_albedo_alpha(p_position + r_light_dir * bake_params.bias, light_pos, hit_albedo, hit_position);
  522. if (ret == RAY_MISS) {
  523. if (!did_hit) {
  524. penumbra = 1.0;
  525. }
  526. break;
  527. } else if (ret == RAY_FRONT || ret == RAY_BACK) {
  528. bool contribute = (ret == RAY_FRONT || !did_hit);
  529. if (!did_hit) {
  530. penumbra = 1.0;
  531. did_hit = true;
  532. }
  533. if (contribute) {
  534. penumbra_color = mix(penumbra_color, penumbra_color * hit_albedo.rgb, hit_albedo.a);
  535. penumbra *= 1.0 - hit_albedo.a;
  536. }
  537. p_position = hit_position + r_light_dir * bake_params.bias;
  538. if (penumbra - EPSILON <= 0) {
  539. break;
  540. }
  541. }
  542. }
  543. penumbra = clamp(penumbra, 0.0, 1.0);
  544. }
  545. r_shadow = penumbra;
  546. r_light = light_data.energy * attenuation * penumbra * penumbra_color;
  547. }
  548. #endif
  549. #if defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
  550. vec3 trace_environment_color(vec3 ray_dir) {
  551. vec3 sky_dir = normalize(mat3(bake_params.env_transform) * ray_dir);
  552. vec2 st = vec2(atan(sky_dir.x, sky_dir.z), acos(sky_dir.y));
  553. if (st.x < 0.0) {
  554. st.x += PI * 2.0;
  555. }
  556. return textureLod(sampler2D(environment, linear_sampler), st / vec2(PI * 2.0, PI), 0.0).rgb;
  557. }
  558. vec3 trace_indirect_light(vec3 p_position, vec3 p_ray_dir, inout uint r_noise, float p_texel_size) {
  559. // The lower limit considers the case where the lightmapper might have bounces disabled but light probes are requested.
  560. vec3 position = p_position;
  561. vec3 ray_dir = p_ray_dir;
  562. uint max_depth = max(bake_params.bounces, 1);
  563. uint transparency_rays_left = bake_params.transparency_rays;
  564. vec3 throughput = vec3(1.0);
  565. vec3 light = vec3(0.0);
  566. for (uint depth = 0; depth < max_depth; depth++) {
  567. uint tidx;
  568. vec3 barycentric;
  569. uint trace_result = trace_ray_closest_hit_triangle(position + ray_dir * bake_params.bias, position + ray_dir * length(bake_params.world_size), tidx, barycentric);
  570. if (trace_result == RAY_FRONT) {
  571. Vertex vert0 = vertices.data[triangles.data[tidx].indices.x];
  572. Vertex vert1 = vertices.data[triangles.data[tidx].indices.y];
  573. Vertex vert2 = vertices.data[triangles.data[tidx].indices.z];
  574. vec3 uvw = vec3(barycentric.x * vert0.uv + barycentric.y * vert1.uv + barycentric.z * vert2.uv, float(triangles.data[tidx].slice));
  575. position = barycentric.x * vert0.position + barycentric.y * vert1.position + barycentric.z * vert2.position;
  576. vec3 prev_normal = ray_dir;
  577. vec3 norm0 = vec3(vert0.normal_xy, vert0.normal_z);
  578. vec3 norm1 = vec3(vert1.normal_xy, vert1.normal_z);
  579. vec3 norm2 = vec3(vert2.normal_xy, vert2.normal_z);
  580. vec3 normal = barycentric.x * norm0 + barycentric.y * norm1 + barycentric.z * norm2;
  581. vec3 direct_light = vec3(0.0f);
  582. #ifdef USE_LIGHT_TEXTURE_FOR_BOUNCES
  583. direct_light += textureLod(sampler2DArray(source_light, linear_sampler), uvw, 0.0).rgb;
  584. #else
  585. // Trace the lights directly. Significantly more expensive but more accurate in scenarios
  586. // where the lightmap texture isn't reliable.
  587. for (uint i = 0; i < bake_params.light_count; i++) {
  588. vec3 light;
  589. vec3 light_dir;
  590. float shadow;
  591. trace_direct_light(position, normal, i, false, light, light_dir, r_noise, p_texel_size, shadow);
  592. direct_light += light * lights.data[i].indirect_energy;
  593. }
  594. direct_light *= bake_params.exposure_normalization;
  595. #endif
  596. vec4 albedo_alpha = textureLod(sampler2DArray(albedo_tex, linear_sampler), uvw, 0).rgba;
  597. vec3 emissive = textureLod(sampler2DArray(emission_tex, linear_sampler), uvw, 0).rgb;
  598. emissive *= bake_params.exposure_normalization;
  599. light += throughput * emissive * albedo_alpha.a;
  600. throughput = mix(throughput, throughput * albedo_alpha.rgb, albedo_alpha.a);
  601. light += throughput * direct_light * bake_params.bounce_indirect_energy * albedo_alpha.a;
  602. if (albedo_alpha.a < 1.0) {
  603. transparency_rays_left -= 1;
  604. depth -= 1;
  605. if (transparency_rays_left <= 0) {
  606. break;
  607. }
  608. // Either bounce off the transparent surface or keep going forward.
  609. float pa = albedo_alpha.a * albedo_alpha.a;
  610. if (randomize(r_noise) > pa) {
  611. normal = prev_normal;
  612. }
  613. position += normal * bake_params.bias;
  614. }
  615. // Use Russian Roulette to determine a probability to terminate the bounce earlier as an optimization.
  616. // <https://computergraphics.stackexchange.com/questions/2316/is-russian-roulette-really-the-answer>
  617. float p = max(max(throughput.x, throughput.y), throughput.z);
  618. if (randomize(r_noise) > p) {
  619. break;
  620. }
  621. // Boost the throughput from the probability of the ray being terminated early.
  622. throughput *= 1.0 / p;
  623. // Generate a new ray direction for the next bounce from this surface's normal.
  624. ray_dir = generate_ray_dir_from_normal(normal, r_noise);
  625. } else if (trace_result == RAY_MISS) {
  626. // Look for the environment color and stop bouncing.
  627. light += throughput * trace_environment_color(ray_dir);
  628. break;
  629. } else if (trace_result == RAY_BACK) {
  630. Vertex vert0 = vertices.data[triangles.data[tidx].indices.x];
  631. Vertex vert1 = vertices.data[triangles.data[tidx].indices.y];
  632. Vertex vert2 = vertices.data[triangles.data[tidx].indices.z];
  633. vec3 uvw = vec3(barycentric.x * vert0.uv + barycentric.y * vert1.uv + barycentric.z * vert2.uv, float(triangles.data[tidx].slice));
  634. position = barycentric.x * vert0.position + barycentric.y * vert1.position + barycentric.z * vert2.position;
  635. vec4 albedo_alpha = textureLod(sampler2DArray(albedo_tex, linear_sampler), uvw, 0).rgba;
  636. if (albedo_alpha.a > 1.0) {
  637. break;
  638. }
  639. transparency_rays_left -= 1;
  640. depth -= 1;
  641. if (transparency_rays_left <= 0) {
  642. break;
  643. }
  644. vec3 norm0 = vec3(vert0.normal_xy, vert0.normal_z);
  645. vec3 norm1 = vec3(vert1.normal_xy, vert1.normal_z);
  646. vec3 norm2 = vec3(vert2.normal_xy, vert2.normal_z);
  647. vec3 normal = barycentric.x * norm0 + barycentric.y * norm1 + barycentric.z * norm2;
  648. vec3 direct_light = vec3(0.0f);
  649. #ifdef USE_LIGHT_TEXTURE_FOR_BOUNCES
  650. direct_light += textureLod(sampler2DArray(source_light, linear_sampler), uvw, 0.0).rgb;
  651. #else
  652. // Trace the lights directly. Significantly more expensive but more accurate in scenarios
  653. // where the lightmap texture isn't reliable.
  654. for (uint i = 0; i < bake_params.light_count; i++) {
  655. vec3 light;
  656. vec3 light_dir;
  657. float shadow;
  658. trace_direct_light(position, normal, i, false, light, light_dir, r_noise, p_texel_size, shadow);
  659. direct_light += light * lights.data[i].indirect_energy;
  660. }
  661. direct_light *= bake_params.exposure_normalization;
  662. #endif
  663. vec3 emissive = textureLod(sampler2DArray(emission_tex, linear_sampler), uvw, 0).rgb;
  664. emissive *= bake_params.exposure_normalization;
  665. light += throughput * emissive * albedo_alpha.a;
  666. throughput = mix(mix(throughput, throughput * albedo_alpha.rgb, albedo_alpha.a), vec3(0.0), albedo_alpha.a);
  667. light += throughput * direct_light * bake_params.bounce_indirect_energy * albedo_alpha.a;
  668. position += ray_dir * bake_params.bias;
  669. }
  670. }
  671. return light;
  672. }
  673. #endif
  674. void main() {
  675. // Check if invocation is out of bounds.
  676. #ifdef MODE_LIGHT_PROBES
  677. int probe_index = int(gl_GlobalInvocationID.x);
  678. if (probe_index >= params.probe_count) {
  679. return;
  680. }
  681. #else
  682. ivec2 atlas_pos = ivec2(gl_GlobalInvocationID.xy) + params.region_ofs;
  683. if (any(greaterThanEqual(atlas_pos, bake_params.atlas_size))) {
  684. return;
  685. }
  686. #endif
  687. #ifdef MODE_DIRECT_LIGHT
  688. vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  689. if (length(normal) < 0.5) {
  690. return; //empty texel, no process
  691. }
  692. vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  693. vec4 neighbor_position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos + ivec2(1, 0), params.atlas_slice), 0).xyzw;
  694. if (neighbor_position.w < 0.001) {
  695. // Empty texel, try again.
  696. neighbor_position.xyz = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos + ivec2(-1, 0), params.atlas_slice), 0).xyz;
  697. }
  698. float texel_size_world_space = distance(position, neighbor_position.xyz) * bake_params.supersampling_factor;
  699. vec3 light_for_texture = vec3(0.0);
  700. vec3 light_for_bounces = vec3(0.0);
  701. #ifdef USE_SHADOWMASK
  702. float shadowmask_value = 0.0f;
  703. #endif
  704. #ifdef USE_SH_LIGHTMAPS
  705. vec4 sh_accum[4] = vec4[](
  706. vec4(0.0, 0.0, 0.0, 1.0),
  707. vec4(0.0, 0.0, 0.0, 1.0),
  708. vec4(0.0, 0.0, 0.0, 1.0),
  709. vec4(0.0, 0.0, 0.0, 1.0));
  710. #endif
  711. // Use atlas position and a prime number as the seed.
  712. uint noise = random_seed(ivec3(atlas_pos, 43573547));
  713. for (uint i = 0; i < bake_params.light_count; i++) {
  714. vec3 light;
  715. vec3 light_dir;
  716. float shadow;
  717. trace_direct_light(position, normal, i, true, light, light_dir, noise, texel_size_world_space, shadow);
  718. if (lights.data[i].static_bake) {
  719. light_for_texture += light;
  720. #ifdef USE_SH_LIGHTMAPS
  721. // These coefficients include the factored out SH evaluation, diffuse convolution, and final application, as well as the BRDF 1/PI and the spherical monte carlo factor.
  722. // LO: 1/(2*sqrtPI) * 1/(2*sqrtPI) * PI * PI * 1/PI = 0.25
  723. // L1: sqrt(3/(4*pi)) * sqrt(3/(4*pi)) * (PI*2/3) * (2 * PI) * 1/PI = 1.0
  724. // Note: This only works because we aren't scaling, rotating, or combing harmonics, we are just directing applying them in the shader.
  725. float c[4] = float[](
  726. 0.25, //l0
  727. light_dir.y, //l1n1
  728. light_dir.z, //l1n0
  729. light_dir.x //l1p1
  730. );
  731. for (uint j = 0; j < 4; j++) {
  732. sh_accum[j].rgb += light * c[j] * bake_params.exposure_normalization;
  733. }
  734. #endif
  735. }
  736. light_for_bounces += light * lights.data[i].indirect_energy;
  737. #ifdef USE_SHADOWMASK
  738. if (lights.data[i].type == LIGHT_TYPE_DIRECTIONAL && i == bake_params.shadowmask_light_idx) {
  739. shadowmask_value = max(shadowmask_value, shadow);
  740. }
  741. #endif
  742. }
  743. light_for_bounces *= bake_params.exposure_normalization;
  744. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_bounces, 1.0));
  745. #ifdef USE_SH_LIGHTMAPS
  746. // Keep for adding at the end.
  747. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 0), sh_accum[0]);
  748. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 1), sh_accum[1]);
  749. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 2), sh_accum[2]);
  750. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 3), sh_accum[3]);
  751. #else
  752. light_for_texture *= bake_params.exposure_normalization;
  753. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_texture, 1.0));
  754. #endif
  755. #ifdef USE_SHADOWMASK
  756. imageStore(shadowmask, ivec3(atlas_pos, params.atlas_slice), vec4(shadowmask_value, shadowmask_value, shadowmask_value, 1.0));
  757. #endif
  758. #endif
  759. #ifdef MODE_BOUNCE_LIGHT
  760. #ifdef USE_SH_LIGHTMAPS
  761. vec4 sh_accum[4] = vec4[](
  762. vec4(0.0, 0.0, 0.0, 1.0),
  763. vec4(0.0, 0.0, 0.0, 1.0),
  764. vec4(0.0, 0.0, 0.0, 1.0),
  765. vec4(0.0, 0.0, 0.0, 1.0));
  766. #else
  767. vec3 light_accum = vec3(0.0);
  768. #endif
  769. // Retrieve starting normal and position.
  770. vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  771. if (length(normal) < 0.5) {
  772. // The pixel is empty, skip processing it.
  773. return;
  774. }
  775. vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  776. int neighbor_offset = atlas_pos.x < bake_params.atlas_size.x - 1 ? 1 : -1;
  777. vec3 neighbor_position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos + ivec2(neighbor_offset, 0), params.atlas_slice), 0).xyz;
  778. float texel_size_world_space = distance(position, neighbor_position);
  779. uint noise = random_seed(ivec3(params.ray_from, atlas_pos));
  780. for (uint i = params.ray_from; i < params.ray_to; i++) {
  781. vec3 ray_dir = generate_ray_dir_from_normal(normal, noise);
  782. vec3 light = trace_indirect_light(position, ray_dir, noise, texel_size_world_space);
  783. #ifdef USE_SH_LIGHTMAPS
  784. // These coefficients include the factored out SH evaluation, diffuse convolution, and final application, as well as the BRDF 1/PI and the spherical monte carlo factor.
  785. // LO: 1/(2*sqrtPI) * 1/(2*sqrtPI) * PI * PI * 1/PI = 0.25
  786. // L1: sqrt(3/(4*pi)) * sqrt(3/(4*pi)) * (PI*2/3) * (2 * PI) * 1/PI = 1.0
  787. // Note: This only works because we aren't scaling, rotating, or combing harmonics, we are just directing applying them in the shader.
  788. float c[4] = float[](
  789. 0.25, //l0
  790. ray_dir.y, //l1n1
  791. ray_dir.z, //l1n0
  792. ray_dir.x //l1p1
  793. );
  794. for (uint j = 0; j < 4; j++) {
  795. sh_accum[j].rgb += light * c[j];
  796. }
  797. #else
  798. light_accum += light;
  799. #endif
  800. }
  801. // Add the averaged result to the accumulated light texture.
  802. #ifdef USE_SH_LIGHTMAPS
  803. for (int i = 0; i < 4; i++) {
  804. vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i));
  805. accum.rgb += sh_accum[i].rgb / float(params.ray_count);
  806. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i), accum);
  807. }
  808. #else
  809. vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice));
  810. accum.rgb += light_accum / float(params.ray_count);
  811. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), accum);
  812. #endif
  813. #endif
  814. #ifdef MODE_UNOCCLUDE
  815. //texel_size = 0.5;
  816. //compute tangents
  817. vec4 position_alpha = imageLoad(position, ivec3(atlas_pos, params.atlas_slice));
  818. if (position_alpha.a < 0.5) {
  819. return;
  820. }
  821. vec3 vertex_pos = position_alpha.xyz;
  822. vec4 normal_tsize = imageLoad(unocclude, ivec3(atlas_pos, params.atlas_slice));
  823. vec3 face_normal = normal_tsize.xyz;
  824. float texel_size = normal_tsize.w;
  825. vec3 v0 = abs(face_normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
  826. vec3 tangent = normalize(cross(v0, face_normal));
  827. vec3 bitangent = normalize(cross(tangent, face_normal));
  828. vec3 base_pos = vertex_pos + face_normal * bake_params.bias; // Raise a bit.
  829. vec3 rays[4] = vec3[](tangent, bitangent, -tangent, -bitangent);
  830. float min_d = 1e20;
  831. for (int i = 0; i < 4; i++) {
  832. vec3 ray_to = base_pos + rays[i] * texel_size;
  833. float d;
  834. vec3 norm;
  835. if (trace_ray_closest_hit_distance(base_pos, ray_to, d, norm) == RAY_BACK) {
  836. if (d < min_d) {
  837. // This bias needs to be greater than the regular bias, because otherwise later, rays will go the other side when pointing back.
  838. vertex_pos = base_pos + rays[i] * d + norm * bake_params.bias * 10.0;
  839. min_d = d;
  840. }
  841. }
  842. }
  843. position_alpha.xyz = vertex_pos;
  844. imageStore(position, ivec3(atlas_pos, params.atlas_slice), position_alpha);
  845. #endif
  846. #ifdef MODE_LIGHT_PROBES
  847. vec3 position = probe_positions.data[probe_index].xyz;
  848. vec4 probe_sh_accum[9] = vec4[](
  849. vec4(0.0),
  850. vec4(0.0),
  851. vec4(0.0),
  852. vec4(0.0),
  853. vec4(0.0),
  854. vec4(0.0),
  855. vec4(0.0),
  856. vec4(0.0),
  857. vec4(0.0));
  858. uint noise = random_seed(ivec3(params.ray_from, probe_index, 49502741 /* some prime */));
  859. for (uint i = params.ray_from; i < params.ray_to; i++) {
  860. vec3 ray_dir = generate_sphere_uniform_direction(noise);
  861. vec3 light = trace_indirect_light(position, ray_dir, noise, 0.0);
  862. float c[9] = float[](
  863. 0.282095, //l0
  864. 0.488603 * ray_dir.y, //l1n1
  865. 0.488603 * ray_dir.z, //l1n0
  866. 0.488603 * ray_dir.x, //l1p1
  867. 1.092548 * ray_dir.x * ray_dir.y, //l2n2
  868. 1.092548 * ray_dir.y * ray_dir.z, //l2n1
  869. //0.315392 * (ray_dir.x * ray_dir.x + ray_dir.y * ray_dir.y + 2.0 * ray_dir.z * ray_dir.z), //l20
  870. 0.315392 * (3.0 * ray_dir.z * ray_dir.z - 1.0), //l20
  871. 1.092548 * ray_dir.x * ray_dir.z, //l2p1
  872. 0.546274 * (ray_dir.x * ray_dir.x - ray_dir.y * ray_dir.y) //l2p2
  873. );
  874. for (uint j = 0; j < 9; j++) {
  875. probe_sh_accum[j].rgb += light * c[j];
  876. }
  877. }
  878. if (params.ray_from > 0) {
  879. for (uint j = 0; j < 9; j++) { //accum from existing
  880. probe_sh_accum[j] += light_probes.data[probe_index * 9 + j];
  881. }
  882. }
  883. if (params.ray_to == params.ray_count) {
  884. for (uint j = 0; j < 9; j++) { //accum from existing
  885. probe_sh_accum[j] *= 4.0 / float(params.ray_count);
  886. }
  887. }
  888. for (uint j = 0; j < 9; j++) { //accum from existing
  889. light_probes.data[probe_index * 9 + j] = probe_sh_accum[j];
  890. }
  891. #endif
  892. #ifdef MODE_DILATE
  893. const int max_radius = int(4.0 * bake_params.supersampling_factor);
  894. const ivec2 directions[8] = ivec2[8](ivec2(-1, 0), ivec2(0, 1), ivec2(1, 0), ivec2(0, -1), ivec2(-1, -1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1));
  895. vec4 texel_color = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0);
  896. for (int radius = 1; radius <= max_radius; radius++) {
  897. for (uint i = 0; i < 8; i++) {
  898. const ivec2 sample_pos = atlas_pos + directions[i] * radius;
  899. // Texture bounds check for robustness.
  900. if (any(lessThan(sample_pos, ivec2(0))) ||
  901. any(greaterThanEqual(sample_pos, textureSize(source_light, 0).xy))) {
  902. continue;
  903. }
  904. vec4 neighbor_color = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(sample_pos, params.atlas_slice), 0);
  905. if (neighbor_color.a > 0.5) {
  906. texel_color = neighbor_color;
  907. break;
  908. }
  909. }
  910. if (texel_color.a > 0.5) {
  911. break;
  912. }
  913. }
  914. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), texel_color);
  915. #endif
  916. #ifdef MODE_DENOISE
  917. // Joint Non-local means (JNLM) denoiser.
  918. //
  919. // Based on YoctoImageDenoiser's JNLM implementation with corrections from "Nonlinearly Weighted First-order Regression for Denoising Monte Carlo Renderings".
  920. //
  921. // <https://github.com/ManuelPrandini/YoctoImageDenoiser/blob/06e19489dd64e47792acffde536393802ba48607/libs/yocto_extension/yocto_extension.cpp#L207>
  922. // <https://benedikt-bitterli.me/nfor/nfor.pdf>
  923. //
  924. // MIT License
  925. //
  926. // Copyright (c) 2020 ManuelPrandini
  927. //
  928. // Permission is hereby granted, free of charge, to any person obtaining a copy
  929. // of this software and associated documentation files (the "Software"), to deal
  930. // in the Software without restriction, including without limitation the rights
  931. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  932. // copies of the Software, and to permit persons to whom the Software is
  933. // furnished to do so, subject to the following conditions:
  934. //
  935. // The above copyright notice and this permission notice shall be included in all
  936. // copies or substantial portions of the Software.
  937. //
  938. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  939. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  940. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  941. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  942. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  943. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  944. // SOFTWARE.
  945. //
  946. // Most of the constants below have been hand-picked to fit the common scenarios lightmaps
  947. // are generated with, but they can be altered freely to experiment and achieve better results.
  948. // Half the size of the patch window around each pixel that is weighted to compute the denoised pixel.
  949. // A value of 1 represents a 3x3 window, a value of 2 a 5x5 window, etc.
  950. const int HALF_PATCH_WINDOW = 3;
  951. // Half the size of the search window around each pixel that is denoised and weighted to compute the denoised pixel.
  952. const int HALF_SEARCH_WINDOW = denoise_params.half_search_window;
  953. // For all of the following sigma values, smaller values will give less weight to pixels that have a bigger distance
  954. // in the feature being evaluated. Therefore, smaller values are likely to cause more noise to appear, but will also
  955. // cause less features to be erased in the process.
  956. // Controls how much the spatial distance of the pixels influences the denoising weight.
  957. const float SIGMA_SPATIAL = denoise_params.spatial_bandwidth;
  958. // Controls how much the light color distance of the pixels influences the denoising weight.
  959. const float SIGMA_LIGHT = denoise_params.light_bandwidth;
  960. // Controls how much the albedo color distance of the pixels influences the denoising weight.
  961. const float SIGMA_ALBEDO = denoise_params.albedo_bandwidth;
  962. // Controls how much the normal vector distance of the pixels influences the denoising weight.
  963. const float SIGMA_NORMAL = denoise_params.normal_bandwidth;
  964. // Strength of the filter. The original paper recommends values around 10 to 15 times the Sigma parameter.
  965. const float FILTER_VALUE = denoise_params.filter_strength * SIGMA_LIGHT;
  966. // Formula constants.
  967. const int PATCH_WINDOW_DIMENSION = (HALF_PATCH_WINDOW * 2 + 1);
  968. const int PATCH_WINDOW_DIMENSION_SQUARE = (PATCH_WINDOW_DIMENSION * PATCH_WINDOW_DIMENSION);
  969. const float TWO_SIGMA_SPATIAL_SQUARE = 2.0f * SIGMA_SPATIAL * SIGMA_SPATIAL;
  970. const float TWO_SIGMA_LIGHT_SQUARE = 2.0f * SIGMA_LIGHT * SIGMA_LIGHT;
  971. const float TWO_SIGMA_ALBEDO_SQUARE = 2.0f * SIGMA_ALBEDO * SIGMA_ALBEDO;
  972. const float TWO_SIGMA_NORMAL_SQUARE = 2.0f * SIGMA_NORMAL * SIGMA_NORMAL;
  973. const float FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE = FILTER_VALUE * FILTER_VALUE * TWO_SIGMA_LIGHT_SQUARE;
  974. const float EPSILON = 1e-6f;
  975. #ifdef USE_SH_LIGHTMAPS
  976. const uint slice_count = 4;
  977. const uint slice_base = params.atlas_slice * slice_count;
  978. #else
  979. const uint slice_count = 1;
  980. const uint slice_base = params.atlas_slice;
  981. #endif
  982. for (uint i = 0; i < slice_count; i++) {
  983. uint lightmap_slice = slice_base + i;
  984. vec3 denoised_rgb = vec3(0.0f);
  985. vec4 input_light = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, lightmap_slice), 0);
  986. vec3 input_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).rgb;
  987. vec3 input_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  988. if (length(input_normal) > EPSILON) {
  989. // Compute the denoised pixel if the normal is valid.
  990. float sum_weights = 0.0f;
  991. vec3 input_rgb = input_light.rgb;
  992. for (int search_y = -HALF_SEARCH_WINDOW; search_y <= HALF_SEARCH_WINDOW; search_y++) {
  993. for (int search_x = -HALF_SEARCH_WINDOW; search_x <= HALF_SEARCH_WINDOW; search_x++) {
  994. ivec2 search_pos = atlas_pos + ivec2(search_x, search_y);
  995. vec3 search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(search_pos, lightmap_slice), 0).rgb;
  996. vec3 search_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).rgb;
  997. vec3 search_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).xyz;
  998. float patch_square_dist = 0.0f;
  999. for (int offset_y = -HALF_PATCH_WINDOW; offset_y <= HALF_PATCH_WINDOW; offset_y++) {
  1000. for (int offset_x = -HALF_PATCH_WINDOW; offset_x <= HALF_PATCH_WINDOW; offset_x++) {
  1001. ivec2 offset_input_pos = atlas_pos + ivec2(offset_x, offset_y);
  1002. ivec2 offset_search_pos = search_pos + ivec2(offset_x, offset_y);
  1003. vec3 offset_input_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_input_pos, lightmap_slice), 0).rgb;
  1004. vec3 offset_search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_search_pos, lightmap_slice), 0).rgb;
  1005. vec3 offset_delta_rgb = offset_input_rgb - offset_search_rgb;
  1006. patch_square_dist += dot(offset_delta_rgb, offset_delta_rgb) - TWO_SIGMA_LIGHT_SQUARE;
  1007. }
  1008. }
  1009. patch_square_dist = max(0.0f, patch_square_dist / (3.0f * PATCH_WINDOW_DIMENSION_SQUARE));
  1010. float weight = 1.0f;
  1011. // Ignore weight if search position is out of bounds.
  1012. weight *= step(0, search_pos.x) * step(search_pos.x, bake_params.atlas_size.x - 1);
  1013. weight *= step(0, search_pos.y) * step(search_pos.y, bake_params.atlas_size.y - 1);
  1014. // Ignore weight if normal is zero length.
  1015. weight *= step(EPSILON, length(search_normal));
  1016. // Weight with pixel distance.
  1017. vec2 pixel_delta = vec2(search_x, search_y);
  1018. float pixel_square_dist = dot(pixel_delta, pixel_delta);
  1019. weight *= exp(-pixel_square_dist / TWO_SIGMA_SPATIAL_SQUARE);
  1020. // Weight with patch.
  1021. weight *= exp(-patch_square_dist / FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE);
  1022. // Weight with albedo.
  1023. vec3 albedo_delta = input_albedo - search_albedo;
  1024. float albedo_square_dist = dot(albedo_delta, albedo_delta);
  1025. weight *= exp(-albedo_square_dist / TWO_SIGMA_ALBEDO_SQUARE);
  1026. // Weight with normal.
  1027. vec3 normal_delta = input_normal - search_normal;
  1028. float normal_square_dist = dot(normal_delta, normal_delta);
  1029. weight *= exp(-normal_square_dist / TWO_SIGMA_NORMAL_SQUARE);
  1030. denoised_rgb += weight * search_rgb;
  1031. sum_weights += weight;
  1032. }
  1033. }
  1034. denoised_rgb /= sum_weights;
  1035. } else {
  1036. // Ignore pixels where the normal is empty, just copy the light color.
  1037. denoised_rgb = input_light.rgb;
  1038. }
  1039. imageStore(dest_light, ivec3(atlas_pos, lightmap_slice), vec4(denoised_rgb, input_light.a));
  1040. }
  1041. #endif
  1042. #ifdef MODE_PACK_L1_COEFFS
  1043. vec4 base_coeff = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice * 4), 0);
  1044. for (int i = 1; i < 4; i++) {
  1045. vec4 c = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice * 4 + i), 0);
  1046. if (abs(base_coeff.r) > 0.0) {
  1047. c.r /= (base_coeff.r * 8);
  1048. }
  1049. if (abs(base_coeff.g) > 0.0) {
  1050. c.g /= (base_coeff.g * 8);
  1051. }
  1052. if (abs(base_coeff.b) > 0.0) {
  1053. c.b /= (base_coeff.b * 8);
  1054. }
  1055. c.rgb += vec3(0.5);
  1056. c.rgb = clamp(c.rgb, vec3(0.0), vec3(1.0));
  1057. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice * 4 + i), c);
  1058. }
  1059. #endif
  1060. }