cluster_store.glsl 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #[compute]
  2. #version 450
  3. #VERSION_DEFINES
  4. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  5. layout(push_constant, std430) uniform Params {
  6. uint cluster_render_data_size; // how much data for a single cluster takes
  7. uint max_render_element_count_div_32; //divided by 32
  8. uvec2 cluster_screen_size;
  9. uint render_element_count_div_32; //divided by 32
  10. uint max_cluster_element_count_div_32; //divided by 32
  11. uint pad1;
  12. uint pad2;
  13. }
  14. params;
  15. layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender {
  16. uint data[];
  17. }
  18. cluster_render;
  19. layout(set = 0, binding = 2, std430) buffer restrict ClusterStore {
  20. uint data[];
  21. }
  22. cluster_store;
  23. struct RenderElement {
  24. uint type; //0-4
  25. bool touches_near;
  26. bool touches_far;
  27. uint original_index;
  28. mat3x4 transform_inv;
  29. vec3 scale;
  30. uint pad;
  31. };
  32. layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements {
  33. RenderElement data[];
  34. }
  35. render_elements;
  36. void main() {
  37. uvec2 pos = gl_GlobalInvocationID.xy;
  38. if (any(greaterThanEqual(pos, params.cluster_screen_size))) {
  39. return;
  40. }
  41. //counter for each type of render_element
  42. //base offset for this cluster
  43. uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y);
  44. uint src_offset = base_offset * params.cluster_render_data_size;
  45. uint render_element_offset = 0;
  46. //check all render_elements and see which one was written to
  47. while (render_element_offset < params.render_element_count_div_32) {
  48. uint bits = cluster_render.data[src_offset + render_element_offset];
  49. while (bits != 0) {
  50. //if bits exist, check the render_element
  51. uint index_bit = findLSB(bits);
  52. uint index = render_element_offset * 32 + index_bit;
  53. uint type = render_elements.data[index].type;
  54. uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index;
  55. uint z_range = cluster_render.data[z_range_offset];
  56. //if object was written, z was written, but check just in case
  57. if (z_range != 0) { //should always be > 0
  58. uint from_z = findLSB(z_range);
  59. uint to_z = findMSB(z_range) + 1;
  60. if (render_elements.data[index].touches_near) {
  61. from_z = 0;
  62. }
  63. if (render_elements.data[index].touches_far) {
  64. to_z = 32;
  65. }
  66. // find cluster offset in the buffer used for indexing in the renderer
  67. uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32);
  68. uint orig_index = render_elements.data[index].original_index;
  69. //store this index in the Z slices by setting the relevant bit
  70. for (uint i = from_z; i < to_z; i++) {
  71. uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i;
  72. uint minmax = cluster_store.data[slice_ofs];
  73. if (minmax == 0) {
  74. minmax = 0xFFFF; //min 0, max 0xFFFF
  75. }
  76. uint elem_min = min(orig_index, minmax & 0xFFFF);
  77. uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to
  78. minmax = elem_min | (elem_max << 16);
  79. cluster_store.data[slice_ofs] = minmax;
  80. }
  81. uint store_word = orig_index >> 5;
  82. uint store_bit = orig_index & 0x1F;
  83. //store the actual render_element index at the end, so the rendering code can reference it
  84. cluster_store.data[dst_offset + store_word] |= 1 << store_bit;
  85. }
  86. bits &= ~(1 << index_bit); //clear the bit to continue iterating
  87. }
  88. render_element_offset++;
  89. }
  90. }