bvh_builder_hair.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../bvh/bvh.h"
  5. #include "../geometry/primitive.h"
  6. #include "../builders/bvh_builder_sah.h"
  7. #include "../builders/heuristic_binning_array_aligned.h"
  8. #include "../builders/heuristic_binning_array_unaligned.h"
  9. #include "../builders/heuristic_strand_array.h"
  10. #define NUM_HAIR_OBJECT_BINS 32
  11. namespace embree
  12. {
  13. namespace isa
  14. {
  15. struct BVHBuilderHair
  16. {
  17. /*! settings for builder */
  18. struct Settings
  19. {
  20. /*! default settings */
  21. Settings ()
  22. : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
  23. public:
  24. size_t branchingFactor; //!< branching factor of BVH to build
  25. size_t maxDepth; //!< maximum depth of BVH to build
  26. size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
  27. size_t minLeafSize; //!< minimum size of a leaf
  28. size_t maxLeafSize; //!< maximum size of a leaf
  29. size_t finished_range_threshold; //!< finished range threshold
  30. };
  31. template<typename NodeRef,
  32. typename CreateAllocFunc,
  33. typename CreateAABBNodeFunc,
  34. typename SetAABBNodeFunc,
  35. typename CreateOBBNodeFunc,
  36. typename SetOBBNodeFunc,
  37. typename CreateLeafFunc,
  38. typename ProgressMonitor,
  39. typename ReportFinishedRangeFunc>
  40. class BuilderT
  41. {
  42. ALIGNED_CLASS_(16);
  43. friend struct BVHBuilderHair;
  44. typedef FastAllocator::CachedAllocator Allocator;
  45. typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
  46. typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
  47. typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
  48. static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
  49. static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
  50. static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
  51. static const size_t travCostAligned = 1;
  52. static const size_t travCostUnaligned = 5;
  53. static const size_t intCost = 6;
  54. BuilderT (Scene* scene,
  55. PrimRef* prims,
  56. const CreateAllocFunc& createAlloc,
  57. const CreateAABBNodeFunc& createAABBNode,
  58. const SetAABBNodeFunc& setAABBNode,
  59. const CreateOBBNodeFunc& createOBBNode,
  60. const SetOBBNodeFunc& setOBBNode,
  61. const CreateLeafFunc& createLeaf,
  62. const ProgressMonitor& progressMonitor,
  63. const ReportFinishedRangeFunc& reportFinishedRange,
  64. const Settings settings)
  65. : cfg(settings),
  66. prims(prims),
  67. createAlloc(createAlloc),
  68. createAABBNode(createAABBNode),
  69. setAABBNode(setAABBNode),
  70. createOBBNode(createOBBNode),
  71. setOBBNode(setOBBNode),
  72. createLeaf(createLeaf),
  73. progressMonitor(progressMonitor),
  74. reportFinishedRange(reportFinishedRange),
  75. alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
  76. /*! checks if all primitives are from the same geometry */
  77. __forceinline bool sameGeometry(const PrimInfoRange& range)
  78. {
  79. if (range.size() == 0) return true;
  80. unsigned int firstGeomID = prims[range.begin()].geomID();
  81. for (size_t i=range.begin()+1; i<range.end(); i++) {
  82. if (prims[i].geomID() != firstGeomID){
  83. return false;
  84. }
  85. }
  86. return true;
  87. }
  88. /*! creates a large leaf that could be larger than supported by the BVH */
  89. NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
  90. {
  91. /* this should never occur but is a fatal error */
  92. if (depth > cfg.maxDepth)
  93. throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
  94. /* create leaf for few primitives */
  95. if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
  96. return createLeaf(prims,pinfo,alloc);
  97. /* fill all children by always splitting the largest one */
  98. PrimInfoRange children[MAX_BRANCHING_FACTOR];
  99. unsigned numChildren = 1;
  100. children[0] = pinfo;
  101. do {
  102. /* find best child with largest bounding box area */
  103. int bestChild = -1;
  104. size_t bestSize = 0;
  105. for (unsigned i=0; i<numChildren; i++)
  106. {
  107. /* ignore leaves as they cannot get split */
  108. if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
  109. continue;
  110. /* remember child with largest size */
  111. if (children[i].size() > bestSize) {
  112. bestSize = children[i].size();
  113. bestChild = i;
  114. }
  115. }
  116. if (bestChild == -1) break;
  117. /*! split best child into left and right child */
  118. __aligned(64) PrimInfoRange left, right;
  119. if (!sameGeometry(children[bestChild])) {
  120. alignedHeuristic.splitByGeometry(children[bestChild],left,right);
  121. } else {
  122. alignedHeuristic.splitFallback(children[bestChild],left,right);
  123. }
  124. /* add new children left and right */
  125. children[bestChild] = children[numChildren-1];
  126. children[numChildren-1] = left;
  127. children[numChildren+0] = right;
  128. numChildren++;
  129. } while (numChildren < cfg.branchingFactor);
  130. /* create node */
  131. auto node = createAABBNode(alloc);
  132. for (size_t i=0; i<numChildren; i++) {
  133. const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
  134. setAABBNode(node,i,child,children[i].geomBounds);
  135. }
  136. return node;
  137. }
  138. /*! performs split */
  139. __noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
  140. {
  141. /* variable to track the SAH of the best splitting approach */
  142. float bestSAH = inf;
  143. const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
  144. const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
  145. /* try standard binning in aligned space */
  146. float alignedObjectSAH = inf;
  147. HeuristicBinningSAH::Split alignedObjectSplit;
  148. if (aligned) {
  149. alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
  150. alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
  151. bestSAH = min(alignedObjectSAH,bestSAH);
  152. }
  153. /* try standard binning in unaligned space */
  154. UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
  155. LinearSpace3fa uspace;
  156. float unalignedObjectSAH = inf;
  157. if (bestSAH > 0.7f*leafSAH) {
  158. uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
  159. const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
  160. unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
  161. unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
  162. bestSAH = min(unalignedObjectSAH,bestSAH);
  163. }
  164. /* try splitting into two strands */
  165. HeuristicStrandSplitSAH::Split strandSplit;
  166. float strandSAH = inf;
  167. if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
  168. strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
  169. strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
  170. bestSAH = min(strandSAH,bestSAH);
  171. }
  172. /* fallback if SAH heuristics failed */
  173. if (unlikely(!std::isfinite(bestSAH)))
  174. {
  175. alignedHeuristic.deterministic_order(pinfo);
  176. alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
  177. }
  178. /* perform aligned split if this is best */
  179. else if (bestSAH == alignedObjectSAH) {
  180. alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
  181. }
  182. /* perform unaligned split if this is best */
  183. else if (bestSAH == unalignedObjectSAH) {
  184. unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
  185. aligned = false;
  186. }
  187. /* perform strand split if this is best */
  188. else if (bestSAH == strandSAH) {
  189. strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
  190. aligned = false;
  191. }
  192. /* can never happen */
  193. else
  194. assert(false);
  195. }
  196. /*! recursive build */
  197. NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
  198. {
  199. /* get thread local allocator */
  200. if (!alloc)
  201. alloc = createAlloc();
  202. /* call memory monitor function to signal progress */
  203. if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
  204. progressMonitor(pinfo.size());
  205. PrimInfoRange children[MAX_BRANCHING_FACTOR];
  206. /* create leaf node */
  207. if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
  208. alignedHeuristic.deterministic_order(pinfo);
  209. return createLargeLeaf(depth,pinfo,alloc);
  210. }
  211. /* fill all children by always splitting the one with the largest surface area */
  212. size_t numChildren = 1;
  213. children[0] = pinfo;
  214. bool aligned = true;
  215. do {
  216. /* find best child with largest bounding box area */
  217. ssize_t bestChild = -1;
  218. float bestArea = neg_inf;
  219. for (size_t i=0; i<numChildren; i++)
  220. {
  221. /* ignore leaves as they cannot get split */
  222. if (children[i].size() <= cfg.minLeafSize)
  223. continue;
  224. /* remember child with largest area */
  225. if (area(children[i].geomBounds) > bestArea) {
  226. bestArea = area(children[i].geomBounds);
  227. bestChild = i;
  228. }
  229. }
  230. if (bestChild == -1) break;
  231. /*! split best child into left and right child */
  232. PrimInfoRange left, right;
  233. split(children[bestChild],left,right,aligned);
  234. /* add new children left and right */
  235. children[bestChild] = children[numChildren-1];
  236. children[numChildren-1] = left;
  237. children[numChildren+0] = right;
  238. numChildren++;
  239. } while (numChildren < cfg.branchingFactor);
  240. NodeRef node;
  241. /* create aligned node */
  242. if (aligned)
  243. {
  244. node = createAABBNode(alloc);
  245. /* spawn tasks or ... */
  246. if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
  247. {
  248. parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
  249. for (size_t i=r.begin(); i<r.end(); i++) {
  250. const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
  251. setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
  252. _mm_mfence(); // to allow non-temporal stores during build
  253. }
  254. });
  255. }
  256. /* ... continue sequentially */
  257. else {
  258. for (size_t i=0; i<numChildren; i++) {
  259. const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
  260. setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
  261. }
  262. }
  263. }
  264. /* create unaligned node */
  265. else
  266. {
  267. node = createOBBNode(alloc);
  268. /* spawn tasks or ... */
  269. if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
  270. {
  271. parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
  272. for (size_t i=r.begin(); i<r.end(); i++) {
  273. const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
  274. const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
  275. const OBBox3fa obounds(space,sinfo.geomBounds);
  276. const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
  277. setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
  278. _mm_mfence(); // to allow non-temporal stores during build
  279. }
  280. });
  281. }
  282. /* ... continue sequentially */
  283. else
  284. {
  285. for (size_t i=0; i<numChildren; i++) {
  286. const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
  287. const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
  288. const OBBox3fa obounds(space,sinfo.geomBounds);
  289. const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
  290. setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
  291. }
  292. }
  293. }
  294. /* reports a finished range of primrefs */
  295. if (unlikely(alloc_barrier))
  296. reportFinishedRange(pinfo);
  297. return node;
  298. }
  299. private:
  300. Settings cfg;
  301. PrimRef* prims;
  302. const CreateAllocFunc& createAlloc;
  303. const CreateAABBNodeFunc& createAABBNode;
  304. const SetAABBNodeFunc& setAABBNode;
  305. const CreateOBBNodeFunc& createOBBNode;
  306. const SetOBBNodeFunc& setOBBNode;
  307. const CreateLeafFunc& createLeaf;
  308. const ProgressMonitor& progressMonitor;
  309. const ReportFinishedRangeFunc& reportFinishedRange;
  310. private:
  311. HeuristicBinningSAH alignedHeuristic;
  312. UnalignedHeuristicBinningSAH unalignedHeuristic;
  313. HeuristicStrandSplitSAH strandHeuristic;
  314. };
  315. template<typename NodeRef,
  316. typename CreateAllocFunc,
  317. typename CreateAABBNodeFunc,
  318. typename SetAABBNodeFunc,
  319. typename CreateOBBNodeFunc,
  320. typename SetOBBNodeFunc,
  321. typename CreateLeafFunc,
  322. typename ProgressMonitor,
  323. typename ReportFinishedRangeFunc>
  324. static NodeRef build (const CreateAllocFunc& createAlloc,
  325. const CreateAABBNodeFunc& createAABBNode,
  326. const SetAABBNodeFunc& setAABBNode,
  327. const CreateOBBNodeFunc& createOBBNode,
  328. const SetOBBNodeFunc& setOBBNode,
  329. const CreateLeafFunc& createLeaf,
  330. const ProgressMonitor& progressMonitor,
  331. const ReportFinishedRangeFunc& reportFinishedRange,
  332. Scene* scene,
  333. PrimRef* prims,
  334. const PrimInfo& pinfo,
  335. const Settings settings)
  336. {
  337. typedef BuilderT<NodeRef,
  338. CreateAllocFunc,
  339. CreateAABBNodeFunc,SetAABBNodeFunc,
  340. CreateOBBNodeFunc,SetOBBNodeFunc,
  341. CreateLeafFunc,ProgressMonitor,
  342. ReportFinishedRangeFunc> Builder;
  343. Builder builder(scene,prims,createAlloc,
  344. createAABBNode,setAABBNode,
  345. createOBBNode,setOBBNode,
  346. createLeaf,progressMonitor,reportFinishedRange,settings);
  347. NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
  348. _mm_mfence(); // to allow non-temporal stores during build
  349. return root;
  350. }
  351. };
  352. }
  353. }