primrefgen_presplit.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../builders/primrefgen.h"
  5. #include "../builders/heuristic_spatial.h"
  6. #include "../builders/splitter.h"
  7. #include "../../common/algorithms/parallel_for_for.h"
  8. #include "../../common/algorithms/parallel_for_for_prefix_sum.h"
  9. #define DBG_PRESPLIT(x)
  10. #define CHECK_PRESPLIT(x)
  11. #define GRID_SIZE 1024
  12. #define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
  13. #define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
  14. #define PRIORITY_CUTOFF_THRESHOLD 1.0f
  15. #define PRIORITY_SPLIT_POS_WEIGHT 1.5f
  16. namespace embree
  17. {
  18. namespace isa
  19. {
  20. struct PresplitItem
  21. {
  22. union {
  23. float priority;
  24. unsigned int data;
  25. };
  26. unsigned int index;
  27. __forceinline operator unsigned() const
  28. {
  29. return reinterpret_cast<const unsigned&>(priority);
  30. }
  31. __forceinline bool operator < (const PresplitItem& item) const
  32. {
  33. return (priority < item.priority);
  34. }
  35. template<typename Mesh>
  36. __forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc)
  37. {
  38. const unsigned int geomID = ref.geomID();
  39. const unsigned int primID = ref.primID();
  40. const float area_aabb = area(ref.bounds());
  41. const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
  42. const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
  43. assert(area_prim <= area_aabb);
  44. //const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
  45. const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
  46. assert(priority >= 0.0f && priority < FLT_LARGE);
  47. return priority;
  48. }
  49. };
  50. inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
  51. return cout << "index " << item.index << " priority " << item.priority;
  52. };
  53. template<typename SplitterFactory>
  54. void splitPrimitive(SplitterFactory &Splitter,
  55. const PrimRef &prim,
  56. const unsigned int geomID,
  57. const unsigned int primID,
  58. const unsigned int split_level,
  59. const Vec3fa &grid_base,
  60. const float grid_scale,
  61. const float grid_extend,
  62. PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
  63. unsigned int& numSubPrims)
  64. {
  65. assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
  66. if (split_level == 0)
  67. {
  68. assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
  69. subPrims[numSubPrims++] = prim;
  70. }
  71. else
  72. {
  73. const Vec3fa lower = prim.lower;
  74. const Vec3fa upper = prim.upper;
  75. const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
  76. const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
  77. Vec3ia ilower(floor(glower));
  78. Vec3ia iupper(floor(gupper));
  79. /* this ignores dimensions that are empty */
  80. iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)));
  81. /* compute a morton code for the lower and upper grid coordinates. */
  82. const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
  83. const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
  84. /* if all bits are equal then we cannot split */
  85. if(unlikely(lower_code == upper_code))
  86. {
  87. assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
  88. subPrims[numSubPrims++] = prim;
  89. return;
  90. }
  91. /* compute octree level and dimension to perform the split in */
  92. const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
  93. const unsigned int level = diff / 3;
  94. const unsigned int dim = diff % 3;
  95. /* now we compute the grid position of the split */
  96. const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
  97. /* compute world space position of split */
  98. const float inv_grid_size = 1.0f / GRID_SIZE;
  99. const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend;
  100. assert(prim.lower[dim] <= fsplit &&
  101. prim.upper[dim] >= fsplit);
  102. /* split primitive */
  103. const auto splitter = Splitter(prim);
  104. BBox3fa left,right;
  105. splitter(prim.bounds(),dim,fsplit,left,right);
  106. assert(!left.empty());
  107. assert(!right.empty());
  108. splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
  109. splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
  110. }
  111. }
  112. template<typename Mesh, typename SplitterFactory>
  113. PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
  114. {
  115. ParallelPrefixSumState<PrimInfo> pstate;
  116. /* first try */
  117. progressMonitor(0);
  118. PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
  119. return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
  120. }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
  121. /* if we need to filter out geometry, run again */
  122. if (pinfo.size() != numPrimRefs)
  123. {
  124. progressMonitor(0);
  125. pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
  126. return geometry->createPrimRefArray(prims,r,base.size(),geomID);
  127. }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
  128. }
  129. return pinfo;
  130. }
  131. __forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref)
  132. {
  133. const Vec3fa lower = ref.lower;
  134. const Vec3fa upper = ref.upper;
  135. const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
  136. const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
  137. Vec3ia ilower(floor(glower));
  138. Vec3ia iupper(floor(gupper));
  139. /* this ignores dimensions that are empty */
  140. iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
  141. /* compute a morton code for the lower and upper grid coordinates. */
  142. const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
  143. const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
  144. return Vec2i(lower_code,upper_code);
  145. }
  146. template<typename Mesh, typename SplitterFactory>
  147. PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
  148. {
  149. static const size_t MIN_STEP_SIZE = 128;
  150. ParallelForForPrefixSumState<PrimInfo> pstate;
  151. Scene::Iterator2 iter(scene,types,mblur);
  152. /* first try */
  153. progressMonitor(0);
  154. pstate.init(iter,size_t(1024));
  155. PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
  156. return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
  157. }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
  158. /* if we need to filter out geometry, run again */
  159. if (pinfo.size() != numPrimRefs)
  160. {
  161. progressMonitor(0);
  162. pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
  163. return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
  164. }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
  165. }
  166. /* use correct number of primitives */
  167. size_t numPrimitives = pinfo.size();
  168. const size_t alloc_numPrimitives = prims.size();
  169. const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives;
  170. /* set up primitive splitter */
  171. SplitterFactory Splitter(scene);
  172. DBG_PRESPLIT(
  173. const size_t org_numPrimitives = pinfo.size();
  174. PRINT(numPrimitives);
  175. PRINT(alloc_numPrimitives);
  176. PRINT(numSplitPrimitivesBudget);
  177. );
  178. /* allocate double buffer presplit items */
  179. const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives;
  180. PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
  181. PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
  182. /* compute grid */
  183. const Vec3fa grid_base = pinfo.geomBounds.lower;
  184. const Vec3fa grid_diag = pinfo.geomBounds.size();
  185. const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z));
  186. const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend;
  187. /* init presplit items and get total sum */
  188. const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
  189. float sum = 0.0f;
  190. for (size_t i=r.begin(); i<r.end(); i++)
  191. {
  192. presplitItem[i].index = (unsigned int)i;
  193. const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]);
  194. /* if all bits are equal then we cannot split */
  195. presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f;
  196. /* FIXME: sum undeterministic */
  197. sum += presplitItem[i].priority;
  198. }
  199. return sum;
  200. },[](const float& a, const float& b) -> float { return a+b; });
  201. /* compute number of splits per primitive */
  202. const float inv_psum = 1.0f / psum;
  203. parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
  204. for (size_t i=r.begin(); i<r.end(); i++)
  205. {
  206. if (presplitItem[i].priority > 0.0f)
  207. {
  208. const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum;
  209. if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims
  210. {
  211. presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
  212. //presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
  213. assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
  214. }
  215. else
  216. presplitItem[i].priority = 0.0f;
  217. }
  218. }
  219. });
  220. auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; };
  221. size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024);
  222. /* anything to split ? */
  223. if (center < numPrimitives)
  224. {
  225. size_t numPrimitivesToSplit = numPrimitives - center;
  226. assert(presplitItem[center].priority >= 1.0f);
  227. /* sort presplit items in ascending order */
  228. radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024);
  229. CHECK_PRESPLIT(
  230. parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
  231. for (size_t i=r.begin(); i<r.end(); i++)
  232. assert(presplitItem[i-1].priority <= presplitItem[i].priority);
  233. });
  234. );
  235. unsigned int* primOffset0 = (unsigned int*)tmp_presplitItem;
  236. unsigned int* primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
  237. /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
  238. const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
  239. size_t sum = 0;
  240. for (size_t i=t.begin(); i<t.end(); i++)
  241. {
  242. PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
  243. assert(presplitItem[i].priority >= 1.0f);
  244. const unsigned int primrefID = presplitItem[i].index;
  245. const float prio = presplitItem[i].priority;
  246. const unsigned int geomID = prims[primrefID].geomID();
  247. const unsigned int primID = prims[primrefID].primID();
  248. const unsigned int split_levels = (unsigned int)prio;
  249. unsigned int numSubPrims = 0;
  250. splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
  251. assert(numSubPrims);
  252. numSubPrims--; // can reuse slot
  253. sum+=numSubPrims;
  254. presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels;
  255. primOffset0[i-center] = numSubPrims;
  256. }
  257. return sum;
  258. },[](const size_t& a, const size_t& b) -> size_t { return a+b; });
  259. /* if we are over budget, need to shrink the range */
  260. if (totalNumSubPrims > numSplitPrimitivesBudget)
  261. {
  262. size_t new_center = numPrimitives-1;
  263. size_t sum = 0;
  264. for (;new_center>=center;new_center--)
  265. {
  266. const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG;
  267. if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
  268. sum += numSubPrims;
  269. }
  270. new_center++;
  271. primOffset0 += new_center - center;
  272. numPrimitivesToSplit -= new_center - center;
  273. center = new_center;
  274. assert(numPrimitivesToSplit == (numPrimitives - center));
  275. }
  276. /* parallel prefix sum to compute offsets for storing sub-primitives */
  277. const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
  278. assert(numPrimitives+offset <= alloc_numPrimitives);
  279. /* iterate over range, and split primitives into sub primitives and append them to prims array */
  280. parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
  281. for (size_t j=rn.begin(); j<rn.end(); j++)
  282. {
  283. PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
  284. const unsigned int primrefID = presplitItem[j].index;
  285. const unsigned int geomID = prims[primrefID].geomID();
  286. const unsigned int primID = prims[primrefID].primID();
  287. const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1);
  288. assert(split_levels);
  289. assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
  290. unsigned int numSubPrims = 0;
  291. splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
  292. const size_t newID = numPrimitives + primOffset1[j-center];
  293. assert(newID+numSubPrims-1 <= alloc_numPrimitives);
  294. prims[primrefID] = subPrims[0];
  295. for (size_t i=1;i<numSubPrims;i++)
  296. prims[newID+i-1] = subPrims[i];
  297. }
  298. });
  299. numPrimitives += offset;
  300. DBG_PRESPLIT(
  301. PRINT(pinfo.size());
  302. PRINT(numPrimitives);
  303. PRINT((float)numPrimitives/org_numPrimitives));
  304. }
  305. /* recompute centroid bounding boxes */
  306. pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
  307. PrimInfo p(empty);
  308. for (size_t j=r.begin(); j<r.end(); j++)
  309. p.add_center2(prims[j]);
  310. return p;
  311. }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
  312. assert(pinfo.size() == numPrimitives);
  313. /* free double buffer presplit items */
  314. alignedFree(tmp_presplitItem);
  315. alignedFree(presplitItem);
  316. return pinfo;
  317. }
  318. }
  319. }