sdfgi_preprocess.glsl 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. #[compute]
  2. #version 450
  3. #VERSION_DEFINES
  4. #ifdef MODE_JUMPFLOOD_OPTIMIZED
  5. #define GROUP_SIZE 8
  6. layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = GROUP_SIZE) in;
  7. #elif defined(MODE_OCCLUSION) || defined(MODE_SCROLL)
  8. //buffer layout
  9. layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  10. #else
  11. //grid layout
  12. layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
  13. #endif
  14. #if defined(MODE_INITIALIZE_JUMP_FLOOD) || defined(MODE_INITIALIZE_JUMP_FLOOD_HALF)
  15. layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color;
  16. layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions;
  17. #endif
  18. #ifdef MODE_UPSCALE_JUMP_FLOOD
  19. layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color;
  20. layout(rgba8ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_positions_half;
  21. layout(rgba8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_positions;
  22. #endif
  23. #if defined(MODE_JUMPFLOOD) || defined(MODE_JUMPFLOOD_OPTIMIZED)
  24. layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions;
  25. layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions;
  26. #endif
  27. #ifdef MODE_JUMPFLOOD_OPTIMIZED
  28. shared uvec4 group_positions[(GROUP_SIZE + 2) * (GROUP_SIZE + 2) * (GROUP_SIZE + 2)]; //4x4x4 with margins
  29. void group_store(ivec3 p_pos, uvec4 p_value) {
  30. uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x);
  31. group_positions[offset] = p_value;
  32. }
  33. uvec4 group_load(ivec3 p_pos) {
  34. uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x);
  35. return group_positions[offset];
  36. }
  37. #endif
  38. #ifdef MODE_OCCLUSION
  39. layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color;
  40. layout(r8, set = 0, binding = 2) uniform restrict image3D dst_occlusion[8];
  41. layout(r32ui, set = 0, binding = 3) uniform restrict readonly uimage3D src_facing;
  42. const uvec2 group_size_offset[11] = uvec2[](uvec2(1, 0), uvec2(3, 1), uvec2(6, 4), uvec2(10, 10), uvec2(15, 20), uvec2(21, 35), uvec2(28, 56), uvec2(36, 84), uvec2(42, 120), uvec2(46, 162), uvec2(48, 208));
  43. const uint group_pos[256] = uint[](0,
  44. 65536, 256, 1,
  45. 131072, 65792, 512, 65537, 257, 2,
  46. 196608, 131328, 66048, 768, 131073, 65793, 513, 65538, 258, 3,
  47. 262144, 196864, 131584, 66304, 1024, 196609, 131329, 66049, 769, 131074, 65794, 514, 65539, 259, 4,
  48. 327680, 262400, 197120, 131840, 66560, 1280, 262145, 196865, 131585, 66305, 1025, 196610, 131330, 66050, 770, 131075, 65795, 515, 65540, 260, 5,
  49. 393216, 327936, 262656, 197376, 132096, 66816, 1536, 327681, 262401, 197121, 131841, 66561, 1281, 262146, 196866, 131586, 66306, 1026, 196611, 131331, 66051, 771, 131076, 65796, 516, 65541, 261, 6,
  50. 458752, 393472, 328192, 262912, 197632, 132352, 67072, 1792, 393217, 327937, 262657, 197377, 132097, 66817, 1537, 327682, 262402, 197122, 131842, 66562, 1282, 262147, 196867, 131587, 66307, 1027, 196612, 131332, 66052, 772, 131077, 65797, 517, 65542, 262, 7,
  51. 459008, 393728, 328448, 263168, 197888, 132608, 67328, 458753, 393473, 328193, 262913, 197633, 132353, 67073, 1793, 393218, 327938, 262658, 197378, 132098, 66818, 1538, 327683, 262403, 197123, 131843, 66563, 1283, 262148, 196868, 131588, 66308, 1028, 196613, 131333, 66053, 773, 131078, 65798, 518, 65543, 263,
  52. 459264, 393984, 328704, 263424, 198144, 132864, 459009, 393729, 328449, 263169, 197889, 132609, 67329, 458754, 393474, 328194, 262914, 197634, 132354, 67074, 1794, 393219, 327939, 262659, 197379, 132099, 66819, 1539, 327684, 262404, 197124, 131844, 66564, 1284, 262149, 196869, 131589, 66309, 1029, 196614, 131334, 66054, 774, 131079, 65799, 519,
  53. 459520, 394240, 328960, 263680, 198400, 459265, 393985, 328705, 263425, 198145, 132865, 459010, 393730, 328450, 263170, 197890, 132610, 67330, 458755, 393475, 328195, 262915, 197635, 132355, 67075, 1795, 393220, 327940, 262660, 197380, 132100, 66820, 1540, 327685, 262405, 197125, 131845, 66565, 1285, 262150, 196870, 131590, 66310, 1030, 196615, 131335, 66055, 775);
  54. shared uint occlusion_facing[((OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2)) / 4];
  55. uint get_facing(ivec3 p_pos) {
  56. uint ofs = uint(p_pos.z * OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2 + p_pos.y * OCCLUSION_SIZE * 2 + p_pos.x);
  57. uint v = occlusion_facing[ofs / 4];
  58. return (v >> ((ofs % 4) * 8)) & 0xFF;
  59. }
  60. #endif
  61. #ifdef MODE_STORE
  62. layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions;
  63. layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_albedo;
  64. layout(r8, set = 0, binding = 3) uniform restrict readonly image3D src_occlusion[8];
  65. layout(r32ui, set = 0, binding = 4) uniform restrict readonly uimage3D src_light;
  66. layout(r32ui, set = 0, binding = 5) uniform restrict readonly uimage3D src_light_aniso;
  67. layout(r32ui, set = 0, binding = 6) uniform restrict readonly uimage3D src_facing;
  68. layout(r8, set = 0, binding = 7) uniform restrict writeonly image3D dst_sdf;
  69. layout(r16ui, set = 0, binding = 8) uniform restrict writeonly uimage3D dst_occlusion;
  70. layout(set = 0, binding = 10, std430) restrict buffer DispatchData {
  71. uint x;
  72. uint y;
  73. uint z;
  74. uint total_count;
  75. }
  76. dispatch_data;
  77. struct ProcessVoxel {
  78. uint position; // xyz 7 bit packed, extra 11 bits for neighbors.
  79. uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbors
  80. uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbors
  81. uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbors
  82. //total neighbors: 26
  83. };
  84. layout(set = 0, binding = 11, std430) restrict buffer writeonly ProcessVoxels {
  85. ProcessVoxel data[];
  86. }
  87. dst_process_voxels;
  88. shared ProcessVoxel store_positions[4 * 4 * 4];
  89. shared uint store_position_count;
  90. shared uint store_from_index;
  91. #endif
  92. #ifdef MODE_SCROLL
  93. layout(r16ui, set = 0, binding = 1) uniform restrict writeonly uimage3D dst_albedo;
  94. layout(r32ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_facing;
  95. layout(r32ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_light;
  96. layout(r32ui, set = 0, binding = 4) uniform restrict writeonly uimage3D dst_light_aniso;
  97. layout(set = 0, binding = 5, std430) restrict buffer readonly DispatchData {
  98. uint x;
  99. uint y;
  100. uint z;
  101. uint total_count;
  102. }
  103. dispatch_data;
  104. struct ProcessVoxel {
  105. uint position; // xyz 7 bit packed, extra 11 bits for neighbors.
  106. uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbors
  107. uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbors
  108. uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbors
  109. //total neighbors: 26
  110. };
  111. layout(set = 0, binding = 6, std430) restrict buffer readonly ProcessVoxels {
  112. ProcessVoxel data[];
  113. }
  114. src_process_voxels;
  115. #endif
  116. #ifdef MODE_SCROLL_OCCLUSION
  117. layout(r8, set = 0, binding = 1) uniform restrict image3D dst_occlusion[8];
  118. layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_occlusion;
  119. #endif
  120. layout(push_constant, std430) uniform Params {
  121. ivec3 scroll;
  122. int grid_size;
  123. ivec3 probe_offset;
  124. int step_size;
  125. bool half_size;
  126. uint occlusion_index;
  127. int cascade;
  128. uint pad;
  129. }
  130. params;
  131. void main() {
  132. #ifdef MODE_SCROLL
  133. // Pixel being shaded
  134. int index = int(gl_GlobalInvocationID.x);
  135. if (index >= dispatch_data.total_count) { //too big
  136. return;
  137. }
  138. ivec3 read_pos = (ivec3(src_process_voxels.data[index].position) >> ivec3(0, 7, 14)) & ivec3(0x7F);
  139. ivec3 write_pos = read_pos + params.scroll;
  140. if (any(lessThan(write_pos, ivec3(0))) || any(greaterThanEqual(write_pos, ivec3(params.grid_size)))) {
  141. return; // Fits outside the 3D texture, don't do anything.
  142. }
  143. uint albedo = ((src_process_voxels.data[index].albedo & 0x7FFF) << 1) | 1; //add solid bit
  144. imageStore(dst_albedo, write_pos, uvec4(albedo));
  145. uint facing = (src_process_voxels.data[index].albedo >> 15) & 0x3F; //6 anisotropic facing bits
  146. imageStore(dst_facing, write_pos, uvec4(facing));
  147. uint light = src_process_voxels.data[index].light & 0x3fffffff; //30 bits of RGBE8985
  148. imageStore(dst_light, write_pos, uvec4(light));
  149. uint light_aniso = src_process_voxels.data[index].light_aniso & 0x3fffffff; //30 bits of 6 anisotropic 5 bits values
  150. imageStore(dst_light_aniso, write_pos, uvec4(light_aniso));
  151. #endif
  152. #ifdef MODE_SCROLL_OCCLUSION
  153. ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
  154. if (any(greaterThanEqual(pos, ivec3(params.grid_size) - abs(params.scroll)))) { //too large, do nothing
  155. return;
  156. }
  157. ivec3 read_pos = pos + max(ivec3(0), -params.scroll);
  158. ivec3 write_pos = pos + max(ivec3(0), params.scroll);
  159. read_pos.z += params.cascade * params.grid_size;
  160. uint occlusion = imageLoad(src_occlusion, read_pos).r;
  161. read_pos.x += params.grid_size;
  162. occlusion |= imageLoad(src_occlusion, read_pos).r << 16;
  163. const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16);
  164. for (uint i = 0; i < 8; i++) {
  165. float o = float((occlusion >> occlusion_shift[i]) & 0xF) / 15.0;
  166. imageStore(dst_occlusion[i], write_pos, vec4(o));
  167. }
  168. #endif
  169. #ifdef MODE_INITIALIZE_JUMP_FLOOD
  170. ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
  171. uint c = imageLoad(src_color, pos).r;
  172. uvec4 v;
  173. if (bool(c & 0x1)) {
  174. //bit set means this is solid
  175. v.xyz = uvec3(pos);
  176. v.w = 255; //not zero means used
  177. } else {
  178. v.xyz = uvec3(0);
  179. v.w = 0; // zero means unused
  180. }
  181. imageStore(dst_positions, pos, v);
  182. #endif
  183. #ifdef MODE_INITIALIZE_JUMP_FLOOD_HALF
  184. ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
  185. ivec3 base_pos = pos * 2;
  186. //since we store in half size, lets kind of randomize what we store, so
  187. //the half size jump flood has a bit better chance to find something
  188. uvec4 closest[8];
  189. int closest_count = 0;
  190. for (uint i = 0; i < 8; i++) {
  191. ivec3 src_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1));
  192. uint c = imageLoad(src_color, src_pos).r;
  193. if (bool(c & 1)) {
  194. uvec4 v = uvec4(uvec3(src_pos), 255);
  195. closest[closest_count] = v;
  196. closest_count++;
  197. }
  198. }
  199. if (closest_count == 0) {
  200. imageStore(dst_positions, pos, uvec4(0));
  201. } else {
  202. ivec3 indexv = (pos & ivec3(1, 1, 1)) * ivec3(1, 2, 4);
  203. int index = (indexv.x | indexv.y | indexv.z) % closest_count;
  204. imageStore(dst_positions, pos, closest[index]);
  205. }
  206. #endif
  207. #ifdef MODE_JUMPFLOOD
  208. //regular jumpflood, efficient for large steps, inefficient for small steps
  209. ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
  210. vec3 posf = vec3(pos);
  211. if (params.half_size) {
  212. posf = posf * 2.0 + 0.5;
  213. }
  214. uvec4 p = imageLoad(src_positions, pos);
  215. if (!params.half_size && p == uvec4(uvec3(pos), 255)) {
  216. imageStore(dst_positions, pos, p);
  217. return; //points to itself and valid, nothing better can be done, just pass
  218. }
  219. float p_dist;
  220. if (p.w != 0) {
  221. p_dist = distance(posf, vec3(p.xyz));
  222. } else {
  223. p_dist = 0.0; //should not matter
  224. }
  225. const uint offset_count = 26;
  226. const ivec3 offsets[offset_count] = ivec3[](
  227. ivec3(-1, -1, -1),
  228. ivec3(-1, -1, 0),
  229. ivec3(-1, -1, 1),
  230. ivec3(-1, 0, -1),
  231. ivec3(-1, 0, 0),
  232. ivec3(-1, 0, 1),
  233. ivec3(-1, 1, -1),
  234. ivec3(-1, 1, 0),
  235. ivec3(-1, 1, 1),
  236. ivec3(0, -1, -1),
  237. ivec3(0, -1, 0),
  238. ivec3(0, -1, 1),
  239. ivec3(0, 0, -1),
  240. ivec3(0, 0, 1),
  241. ivec3(0, 1, -1),
  242. ivec3(0, 1, 0),
  243. ivec3(0, 1, 1),
  244. ivec3(1, -1, -1),
  245. ivec3(1, -1, 0),
  246. ivec3(1, -1, 1),
  247. ivec3(1, 0, -1),
  248. ivec3(1, 0, 0),
  249. ivec3(1, 0, 1),
  250. ivec3(1, 1, -1),
  251. ivec3(1, 1, 0),
  252. ivec3(1, 1, 1));
  253. for (uint i = 0; i < offset_count; i++) {
  254. ivec3 ofs = pos + offsets[i] * params.step_size;
  255. if (any(lessThan(ofs, ivec3(0))) || any(greaterThanEqual(ofs, ivec3(params.grid_size)))) {
  256. continue;
  257. }
  258. uvec4 q = imageLoad(src_positions, ofs);
  259. if (q.w == 0) {
  260. continue; //was not initialized yet, ignore
  261. }
  262. float q_dist = distance(posf, vec3(q.xyz));
  263. if (p.w == 0 || q_dist < p_dist) {
  264. p = q; //just replace because current is unused
  265. p_dist = q_dist;
  266. }
  267. }
  268. imageStore(dst_positions, pos, p);
  269. #endif
  270. #ifdef MODE_JUMPFLOOD_OPTIMIZED
  271. //optimized version using shared compute memory
  272. ivec3 group_offset = ivec3(gl_WorkGroupID.xyz) % params.step_size;
  273. ivec3 group_pos = group_offset + (ivec3(gl_WorkGroupID.xyz) / params.step_size) * ivec3(GROUP_SIZE * params.step_size);
  274. //load data into local group memory
  275. if (all(lessThan(ivec3(gl_LocalInvocationID.xyz), ivec3((GROUP_SIZE + 2) / 2)))) {
  276. //use this thread for loading, this method uses less threads for this but its simpler and less divergent
  277. ivec3 base_pos = ivec3(gl_LocalInvocationID.xyz) * 2;
  278. for (uint i = 0; i < 8; i++) {
  279. ivec3 load_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1));
  280. ivec3 load_global_pos = group_pos + (load_pos - ivec3(1)) * params.step_size;
  281. uvec4 q;
  282. if (all(greaterThanEqual(load_global_pos, ivec3(0))) && all(lessThan(load_global_pos, ivec3(params.grid_size)))) {
  283. q = imageLoad(src_positions, load_global_pos);
  284. } else {
  285. q = uvec4(0); //unused
  286. }
  287. group_store(load_pos, q);
  288. }
  289. }
  290. ivec3 global_pos = group_pos + ivec3(gl_LocalInvocationID.xyz) * params.step_size;
  291. if (any(lessThan(global_pos, ivec3(0))) || any(greaterThanEqual(global_pos, ivec3(params.grid_size)))) {
  292. return; //do nothing else, end here because outside range
  293. }
  294. //sync
  295. groupMemoryBarrier();
  296. barrier();
  297. ivec3 local_pos = ivec3(gl_LocalInvocationID.xyz) + ivec3(1);
  298. const uint offset_count = 27;
  299. const ivec3 offsets[offset_count] = ivec3[](
  300. ivec3(-1, -1, -1),
  301. ivec3(-1, -1, 0),
  302. ivec3(-1, -1, 1),
  303. ivec3(-1, 0, -1),
  304. ivec3(-1, 0, 0),
  305. ivec3(-1, 0, 1),
  306. ivec3(-1, 1, -1),
  307. ivec3(-1, 1, 0),
  308. ivec3(-1, 1, 1),
  309. ivec3(0, -1, -1),
  310. ivec3(0, -1, 0),
  311. ivec3(0, -1, 1),
  312. ivec3(0, 0, -1),
  313. ivec3(0, 0, 0),
  314. ivec3(0, 0, 1),
  315. ivec3(0, 1, -1),
  316. ivec3(0, 1, 0),
  317. ivec3(0, 1, 1),
  318. ivec3(1, -1, -1),
  319. ivec3(1, -1, 0),
  320. ivec3(1, -1, 1),
  321. ivec3(1, 0, -1),
  322. ivec3(1, 0, 0),
  323. ivec3(1, 0, 1),
  324. ivec3(1, 1, -1),
  325. ivec3(1, 1, 0),
  326. ivec3(1, 1, 1));
  327. //only makes sense if point is inside screen
  328. uvec4 closest = uvec4(0);
  329. float closest_dist = 0.0;
  330. vec3 posf = vec3(global_pos);
  331. if (params.half_size) {
  332. posf = posf * 2.0 + 0.5;
  333. }
  334. for (uint i = 0; i < offset_count; i++) {
  335. uvec4 point = group_load(local_pos + offsets[i]);
  336. if (point.w == 0) {
  337. continue; //was not initialized yet, ignore
  338. }
  339. float dist = distance(posf, vec3(point.xyz));
  340. if (closest.w == 0 || dist < closest_dist) {
  341. closest = point;
  342. closest_dist = dist;
  343. }
  344. }
  345. imageStore(dst_positions, global_pos, closest);
  346. #endif
  347. #ifdef MODE_UPSCALE_JUMP_FLOOD
  348. ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
  349. uint c = imageLoad(src_color, pos).r;
  350. uvec4 v;
  351. if (bool(c & 1)) {
  352. //bit set means this is solid
  353. v.xyz = uvec3(pos);
  354. v.w = 255; //not zero means used
  355. } else {
  356. v = imageLoad(src_positions_half, pos >> 1);
  357. float d = length(vec3(ivec3(v.xyz) - pos));
  358. ivec3 vbase = ivec3(v.xyz - (v.xyz & uvec3(1)));
  359. //search around if there is a better candidate from the same block
  360. for (int i = 0; i < 8; i++) {
  361. ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1));
  362. ivec3 p = vbase + bits;
  363. float d2 = length(vec3(p - pos));
  364. if (d2 < d) { //check valid distance before test so we avoid a read
  365. uint c2 = imageLoad(src_color, p).r;
  366. if (bool(c2 & 1)) {
  367. v.xyz = uvec3(p);
  368. d = d2;
  369. }
  370. }
  371. }
  372. //could validate better position..
  373. }
  374. imageStore(dst_positions, pos, v);
  375. #endif
  376. #ifdef MODE_OCCLUSION
  377. uint invocation_idx = uint(gl_LocalInvocationID.x);
  378. ivec3 region = ivec3(gl_WorkGroupID);
  379. ivec3 region_offset = -ivec3(OCCLUSION_SIZE);
  380. region_offset += region * OCCLUSION_SIZE * 2;
  381. region_offset += params.probe_offset * OCCLUSION_SIZE;
  382. if (params.scroll != ivec3(0)) {
  383. //validate scroll region
  384. ivec3 region_offset_to = region_offset + ivec3(OCCLUSION_SIZE * 2);
  385. uvec3 scroll_mask = uvec3(notEqual(params.scroll, ivec3(0))); //save which axes acre scrolling
  386. ivec3 scroll_from = mix(ivec3(0), ivec3(params.grid_size) + params.scroll, lessThan(params.scroll, ivec3(0)));
  387. ivec3 scroll_to = mix(ivec3(params.grid_size), params.scroll, greaterThan(params.scroll, ivec3(0)));
  388. if ((uvec3(lessThanEqual(region_offset_to, scroll_from)) | uvec3(greaterThanEqual(region_offset, scroll_to))) * scroll_mask == scroll_mask) { //all axes that scroll are out, exit
  389. return; //region outside scroll bounds, quit
  390. }
  391. }
  392. #define OCC_HALF_SIZE (OCCLUSION_SIZE / 2)
  393. ivec3 local_ofs = ivec3(uvec3(invocation_idx % OCC_HALF_SIZE, (invocation_idx % (OCC_HALF_SIZE * OCC_HALF_SIZE)) / OCC_HALF_SIZE, invocation_idx / (OCC_HALF_SIZE * OCC_HALF_SIZE))) * 4;
  394. /* for(int i=0;i<64;i++) {
  395. ivec3 offset = region_offset + local_ofs + ((ivec3(i) >> ivec3(0,2,4)) & ivec3(3,3,3));
  396. uint facig =
  397. if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) {*/
  398. for (int i = 0; i < 16; i++) { //skip x, so it can be packed
  399. ivec3 offset = local_ofs + ((ivec3(i * 4) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3));
  400. uint facing_pack = 0;
  401. for (int j = 0; j < 4; j++) {
  402. ivec3 foffset = region_offset + offset + ivec3(j, 0, 0);
  403. if (all(greaterThanEqual(foffset, ivec3(0))) && all(lessThan(foffset, ivec3(params.grid_size)))) {
  404. uint f = imageLoad(src_facing, foffset).r;
  405. facing_pack |= f << (j * 8);
  406. }
  407. }
  408. occlusion_facing[(offset.z * (OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2) + offset.y * (OCCLUSION_SIZE * 2) + offset.x) / 4] = facing_pack;
  409. }
  410. //sync occlusion saved
  411. groupMemoryBarrier();
  412. barrier();
  413. //process occlusion
  414. #define OCC_STEPS (OCCLUSION_SIZE * 3 - 2)
  415. #define OCC_HALF_STEPS (OCC_STEPS / 2)
  416. for (int step = 0; step < OCC_STEPS; step++) {
  417. bool shrink = step >= OCC_HALF_STEPS;
  418. int occ_step = shrink ? OCC_HALF_STEPS - (step - OCC_HALF_STEPS) - 1 : step;
  419. if (invocation_idx < group_size_offset[occ_step].x) {
  420. uint pv = group_pos[group_size_offset[occ_step].y + invocation_idx];
  421. ivec3 proc_abs = (ivec3(int(pv)) >> ivec3(0, 8, 16)) & ivec3(0xFF);
  422. if (shrink) {
  423. proc_abs = ivec3(OCCLUSION_SIZE) - proc_abs - ivec3(1);
  424. }
  425. for (int i = 0; i < 8; i++) {
  426. ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1));
  427. ivec3 proc_sign = bits * 2 - 1;
  428. ivec3 local_offset = ivec3(OCCLUSION_SIZE) + proc_abs * proc_sign - (ivec3(1) - bits);
  429. ivec3 offset = local_offset + region_offset;
  430. if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) {
  431. float occ;
  432. uint facing = get_facing(local_offset);
  433. if (facing != 0) { //solid
  434. occ = 0.0;
  435. } else if (step == 0) {
  436. #if 0
  437. occ = 0.0;
  438. if (get_facing(local_offset - ivec3(proc_sign.x,0,0))==0) {
  439. occ+=1.0;
  440. }
  441. if (get_facing(local_offset - ivec3(0,proc_sign.y,0))==0) {
  442. occ+=1.0;
  443. }
  444. if (get_facing(local_offset - ivec3(0,0,proc_sign.z))==0) {
  445. occ+=1.0;
  446. }
  447. /*
  448. if (get_facing(local_offset - proc_sign)==0) {
  449. occ+=1.0;
  450. }*/
  451. occ/=3.0;
  452. #endif
  453. occ = 1.0;
  454. } else {
  455. ivec3 read_dir = -proc_sign;
  456. ivec3 major_axis;
  457. if (proc_abs.x < proc_abs.y) {
  458. if (proc_abs.z < proc_abs.y) {
  459. major_axis = ivec3(0, 1, 0);
  460. } else {
  461. major_axis = ivec3(0, 0, 1);
  462. }
  463. } else {
  464. if (proc_abs.z < proc_abs.x) {
  465. major_axis = ivec3(1, 0, 0);
  466. } else {
  467. major_axis = ivec3(0, 0, 1);
  468. }
  469. }
  470. float avg = 0.0;
  471. occ = 0.0;
  472. ivec3 read_x = offset + ivec3(read_dir.x, 0, 0) + (proc_abs.x == 0 ? major_axis * read_dir : ivec3(0));
  473. ivec3 read_y = offset + ivec3(0, read_dir.y, 0) + (proc_abs.y == 0 ? major_axis * read_dir : ivec3(0));
  474. ivec3 read_z = offset + ivec3(0, 0, read_dir.z) + (proc_abs.z == 0 ? major_axis * read_dir : ivec3(0));
  475. uint facing_x = get_facing(read_x - region_offset);
  476. if (facing_x == 0) {
  477. if (all(greaterThanEqual(read_x, ivec3(0))) && all(lessThan(read_x, ivec3(params.grid_size)))) {
  478. occ += imageLoad(dst_occlusion[params.occlusion_index], read_x).r;
  479. avg += 1.0;
  480. }
  481. } else {
  482. if (proc_abs.x != 0) { //do not occlude from voxels in the opposite octant
  483. avg += 1.0;
  484. }
  485. }
  486. uint facing_y = get_facing(read_y - region_offset);
  487. if (facing_y == 0) {
  488. if (all(greaterThanEqual(read_y, ivec3(0))) && all(lessThan(read_y, ivec3(params.grid_size)))) {
  489. occ += imageLoad(dst_occlusion[params.occlusion_index], read_y).r;
  490. avg += 1.0;
  491. }
  492. } else {
  493. if (proc_abs.y != 0) {
  494. avg += 1.0;
  495. }
  496. }
  497. uint facing_z = get_facing(read_z - region_offset);
  498. if (facing_z == 0) {
  499. if (all(greaterThanEqual(read_z, ivec3(0))) && all(lessThan(read_z, ivec3(params.grid_size)))) {
  500. occ += imageLoad(dst_occlusion[params.occlusion_index], read_z).r;
  501. avg += 1.0;
  502. }
  503. } else {
  504. if (proc_abs.z != 0) {
  505. avg += 1.0;
  506. }
  507. }
  508. if (avg > 0.0) {
  509. occ /= avg;
  510. }
  511. }
  512. imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ));
  513. }
  514. }
  515. }
  516. groupMemoryBarrier();
  517. barrier();
  518. }
  519. #if 1
  520. //bias solid voxels away
  521. for (int i = 0; i < 64; i++) {
  522. ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3));
  523. ivec3 offset = region_offset + local_offset;
  524. if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) {
  525. uint facing = get_facing(local_offset);
  526. if (facing != 0) {
  527. //only work on solids
  528. ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE);
  529. proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0)));
  530. float avg = 0.0;
  531. float occ = 0.0;
  532. ivec3 read_dir = -sign(proc_pos);
  533. ivec3 read_dir_x = ivec3(read_dir.x, 0, 0);
  534. ivec3 read_dir_y = ivec3(0, read_dir.y, 0);
  535. ivec3 read_dir_z = ivec3(0, 0, read_dir.z);
  536. //solid
  537. #if 0
  538. uvec3 facing_pos_base = (uvec3(facing) >> uvec3(0,1,2)) & uvec3(1,1,1);
  539. uvec3 facing_neg_base = (uvec3(facing) >> uvec3(3,4,5)) & uvec3(1,1,1);
  540. uvec3 facing_pos= facing_pos_base &((~facing_neg_base)&uvec3(1,1,1));
  541. uvec3 facing_neg= facing_neg_base &((~facing_pos_base)&uvec3(1,1,1));
  542. #else
  543. uvec3 facing_pos = (uvec3(facing) >> uvec3(0, 1, 2)) & uvec3(1, 1, 1);
  544. uvec3 facing_neg = (uvec3(facing) >> uvec3(3, 4, 5)) & uvec3(1, 1, 1);
  545. #endif
  546. bvec3 read_valid = bvec3(mix(facing_neg, facing_pos, greaterThan(read_dir, ivec3(0))));
  547. //sides
  548. if (read_valid.x) {
  549. ivec3 read_offset = local_offset + read_dir_x;
  550. uint f = get_facing(read_offset);
  551. if (f == 0) {
  552. read_offset += region_offset;
  553. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  554. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  555. avg += 1.0;
  556. }
  557. }
  558. }
  559. if (read_valid.y) {
  560. ivec3 read_offset = local_offset + read_dir_y;
  561. uint f = get_facing(read_offset);
  562. if (f == 0) {
  563. read_offset += region_offset;
  564. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  565. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  566. avg += 1.0;
  567. }
  568. }
  569. }
  570. if (read_valid.z) {
  571. ivec3 read_offset = local_offset + read_dir_z;
  572. uint f = get_facing(read_offset);
  573. if (f == 0) {
  574. read_offset += region_offset;
  575. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  576. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  577. avg += 1.0;
  578. }
  579. }
  580. }
  581. //adjacents
  582. if (all(read_valid.yz)) {
  583. ivec3 read_offset = local_offset + read_dir_y + read_dir_z;
  584. uint f = get_facing(read_offset);
  585. if (f == 0) {
  586. read_offset += region_offset;
  587. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  588. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  589. avg += 1.0;
  590. }
  591. }
  592. }
  593. if (all(read_valid.xz)) {
  594. ivec3 read_offset = local_offset + read_dir_x + read_dir_z;
  595. uint f = get_facing(read_offset);
  596. if (f == 0) {
  597. read_offset += region_offset;
  598. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  599. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  600. avg += 1.0;
  601. }
  602. }
  603. }
  604. if (all(read_valid.xy)) {
  605. ivec3 read_offset = local_offset + read_dir_x + read_dir_y;
  606. uint f = get_facing(read_offset);
  607. if (f == 0) {
  608. read_offset += region_offset;
  609. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  610. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  611. avg += 1.0;
  612. }
  613. }
  614. }
  615. //diagonal
  616. if (all(read_valid)) {
  617. ivec3 read_offset = local_offset + read_dir;
  618. uint f = get_facing(read_offset);
  619. if (f == 0) {
  620. read_offset += region_offset;
  621. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  622. occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r;
  623. avg += 1.0;
  624. }
  625. }
  626. }
  627. if (avg > 0.0) {
  628. occ /= avg;
  629. }
  630. imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ));
  631. }
  632. }
  633. }
  634. #endif
  635. #if 1
  636. groupMemoryBarrier();
  637. barrier();
  638. for (int i = 0; i < 64; i++) {
  639. ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3));
  640. ivec3 offset = region_offset + local_offset;
  641. if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) {
  642. uint facing = get_facing(local_offset);
  643. if (facing == 0) {
  644. ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE);
  645. proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0)));
  646. ivec3 proc_abs = abs(proc_pos);
  647. ivec3 read_dir = sign(proc_pos); //opposite direction
  648. ivec3 read_dir_x = ivec3(read_dir.x, 0, 0);
  649. ivec3 read_dir_y = ivec3(0, read_dir.y, 0);
  650. ivec3 read_dir_z = ivec3(0, 0, read_dir.z);
  651. //solid
  652. uvec3 read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match positive with negative normals
  653. uvec3 block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match positive with negative normals
  654. block_mask = uvec3(0);
  655. float visible = 0.0;
  656. float occlude_total = 0.0;
  657. if (proc_abs.x < OCCLUSION_SIZE) {
  658. ivec3 read_offset = local_offset + read_dir_x;
  659. uint x_mask = get_facing(read_offset);
  660. if (x_mask != 0) {
  661. read_offset += region_offset;
  662. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  663. occlude_total += 1.0;
  664. if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) {
  665. visible += 1.0;
  666. }
  667. }
  668. }
  669. }
  670. if (proc_abs.y < OCCLUSION_SIZE) {
  671. ivec3 read_offset = local_offset + read_dir_y;
  672. uint y_mask = get_facing(read_offset);
  673. if (y_mask != 0) {
  674. read_offset += region_offset;
  675. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  676. occlude_total += 1.0;
  677. if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) {
  678. visible += 1.0;
  679. }
  680. }
  681. }
  682. }
  683. if (proc_abs.z < OCCLUSION_SIZE) {
  684. ivec3 read_offset = local_offset + read_dir_z;
  685. uint z_mask = get_facing(read_offset);
  686. if (z_mask != 0) {
  687. read_offset += region_offset;
  688. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  689. occlude_total += 1.0;
  690. if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) {
  691. visible += 1.0;
  692. }
  693. }
  694. }
  695. }
  696. //if near the cartesian plane, test in opposite direction too
  697. read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match negative with positive normals
  698. block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match negative with positive normals
  699. block_mask = uvec3(0);
  700. if (proc_abs.x == 1) {
  701. ivec3 read_offset = local_offset - read_dir_x;
  702. uint x_mask = get_facing(read_offset);
  703. if (x_mask != 0) {
  704. read_offset += region_offset;
  705. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  706. occlude_total += 1.0;
  707. if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) {
  708. visible += 1.0;
  709. }
  710. }
  711. }
  712. }
  713. if (proc_abs.y == 1) {
  714. ivec3 read_offset = local_offset - read_dir_y;
  715. uint y_mask = get_facing(read_offset);
  716. if (y_mask != 0) {
  717. read_offset += region_offset;
  718. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  719. occlude_total += 1.0;
  720. if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) {
  721. visible += 1.0;
  722. }
  723. }
  724. }
  725. }
  726. if (proc_abs.z == 1) {
  727. ivec3 read_offset = local_offset - read_dir_z;
  728. uint z_mask = get_facing(read_offset);
  729. if (z_mask != 0) {
  730. read_offset += region_offset;
  731. if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) {
  732. occlude_total += 1.0;
  733. if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) {
  734. visible += 1.0;
  735. }
  736. }
  737. }
  738. }
  739. if (occlude_total > 0.0) {
  740. float occ = imageLoad(dst_occlusion[params.occlusion_index], offset).r;
  741. occ *= visible / occlude_total;
  742. imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ));
  743. }
  744. }
  745. }
  746. }
  747. #endif
  748. /*
  749. for(int i=0;i<8;i++) {
  750. ivec3 local_offset = local_pos + ((ivec3(i) >> ivec3(2,1,0)) & ivec3(1,1,1)) * OCCLUSION_SIZE;
  751. ivec3 offset = local_offset - ivec3(OCCLUSION_SIZE); //looking around probe, so starts negative
  752. offset += region * OCCLUSION_SIZE * 2; //offset by region
  753. offset += params.probe_offset * OCCLUSION_SIZE; // offset by probe offset
  754. if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) {
  755. imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_data[ to_linear(local_offset) ] ));
  756. //imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_solid[ to_linear(local_offset) ] ));
  757. }
  758. }
  759. */
  760. #endif
  761. #ifdef MODE_STORE
  762. ivec3 local = ivec3(gl_LocalInvocationID.xyz);
  763. ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
  764. // store SDF
  765. uvec4 p = imageLoad(src_positions, pos);
  766. bool solid = false;
  767. float d;
  768. if (ivec3(p.xyz) == pos) {
  769. //solid block
  770. d = 0;
  771. solid = true;
  772. } else {
  773. //distance block
  774. d = 1.0 + length(vec3(p.xyz) - vec3(pos));
  775. }
  776. d /= 255.0;
  777. imageStore(dst_sdf, pos, vec4(d));
  778. // STORE OCCLUSION
  779. uint occlusion = 0;
  780. const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16);
  781. for (int i = 0; i < 8; i++) {
  782. float occ = imageLoad(src_occlusion[i], pos).r;
  783. occlusion |= uint(clamp(occ * 15.0, 0.0, 15.0)) << occlusion_shift[i];
  784. }
  785. {
  786. ivec3 occ_pos = pos;
  787. occ_pos.z += params.cascade * params.grid_size;
  788. imageStore(dst_occlusion, occ_pos, uvec4(occlusion & 0xFFFF));
  789. occ_pos.x += params.grid_size;
  790. imageStore(dst_occlusion, occ_pos, uvec4(occlusion >> 16));
  791. }
  792. // STORE POSITIONS
  793. if (local == ivec3(0)) {
  794. store_position_count = 0; //base one stores as zero, the others wait
  795. }
  796. groupMemoryBarrier();
  797. barrier();
  798. if (solid) {
  799. uint index = atomicAdd(store_position_count, 1);
  800. // At least do the conversion work in parallel
  801. store_positions[index].position = uint(pos.x | (pos.y << 7) | (pos.z << 14));
  802. //see around which voxels point to this one, add them to the list
  803. uint bit_index = 0;
  804. uint neighbour_bits = 0;
  805. for (int i = -1; i <= 1; i++) {
  806. for (int j = -1; j <= 1; j++) {
  807. for (int k = -1; k <= 1; k++) {
  808. if (i == 0 && j == 0 && k == 0) {
  809. continue;
  810. }
  811. ivec3 npos = pos + ivec3(i, j, k);
  812. if (all(greaterThanEqual(npos, ivec3(0))) && all(lessThan(npos, ivec3(params.grid_size)))) {
  813. p = imageLoad(src_positions, npos);
  814. if (ivec3(p.xyz) == pos) {
  815. neighbour_bits |= (1 << bit_index);
  816. }
  817. }
  818. bit_index++;
  819. }
  820. }
  821. }
  822. uint rgb = imageLoad(src_albedo, pos).r;
  823. uint facing = imageLoad(src_facing, pos).r;
  824. store_positions[index].albedo = rgb >> 1; //store as it comes (555) to avoid precision loss (and move away the alpha bit)
  825. store_positions[index].albedo |= (facing & 0x3F) << 15; // store facing in bits 15-21
  826. store_positions[index].albedo |= neighbour_bits << 21; //store lower 11 bits of neighbors with remaining albedo
  827. store_positions[index].position |= (neighbour_bits >> 11) << 21; //store 11 bits more of neighbors with position
  828. store_positions[index].light = imageLoad(src_light, pos).r;
  829. store_positions[index].light_aniso = imageLoad(src_light_aniso, pos).r;
  830. //add neighbors
  831. store_positions[index].light |= (neighbour_bits >> 22) << 30; //store 2 bits more of neighbors with light
  832. store_positions[index].light_aniso |= (neighbour_bits >> 24) << 30; //store 2 bits more of neighbors with aniso
  833. }
  834. groupMemoryBarrier();
  835. barrier();
  836. // global increment only once per group, to reduce pressure
  837. if (local == ivec3(0) && store_position_count > 0) {
  838. store_from_index = atomicAdd(dispatch_data.total_count, store_position_count);
  839. uint group_count = (store_from_index + store_position_count - 1) / 64 + 1;
  840. atomicMax(dispatch_data.x, group_count);
  841. }
  842. groupMemoryBarrier();
  843. barrier();
  844. uint read_index = uint(local.z * 4 * 4 + local.y * 4 + local.x);
  845. uint write_index = store_from_index + read_index;
  846. if (read_index < store_position_count) {
  847. dst_process_voxels.data[write_index] = store_positions[read_index];
  848. }
  849. if (pos == ivec3(0)) {
  850. //this thread clears y and z
  851. dispatch_data.y = 1;
  852. dispatch_data.z = 1;
  853. }
  854. #endif
  855. }