bvh_builder_msmblur_hair.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../bvh/bvh.h"
  5. #include "../geometry/primitive.h"
  6. #include "../builders/bvh_builder_msmblur.h"
  7. #include "../builders/heuristic_binning_array_aligned.h"
  8. #include "../builders/heuristic_binning_array_unaligned.h"
  9. #include "../builders/heuristic_timesplit_array.h"
  10. namespace embree
  11. {
  12. namespace isa
  13. {
  14. struct BVHBuilderHairMSMBlur
  15. {
  16. /*! settings for msmblur builder */
  17. struct Settings
  18. {
  19. /*! default settings */
  20. Settings ()
  21. : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
  22. public:
  23. size_t branchingFactor; //!< branching factor of BVH to build
  24. size_t maxDepth; //!< maximum depth of BVH to build
  25. size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
  26. size_t minLeafSize; //!< minimum size of a leaf
  27. size_t maxLeafSize; //!< maximum size of a leaf
  28. };
  29. struct BuildRecord
  30. {
  31. public:
  32. __forceinline BuildRecord () {}
  33. __forceinline BuildRecord (size_t depth)
  34. : depth(depth) {}
  35. __forceinline BuildRecord (const SetMB& prims, size_t depth)
  36. : depth(depth), prims(prims) {}
  37. __forceinline size_t size() const {
  38. return prims.size();
  39. }
  40. public:
  41. size_t depth; //!< depth of the root of this subtree
  42. SetMB prims; //!< the list of primitives
  43. };
  44. template<typename NodeRef,
  45. typename RecalculatePrimRef,
  46. typename CreateAllocFunc,
  47. typename CreateAABBNodeMBFunc,
  48. typename SetAABBNodeMBFunc,
  49. typename CreateOBBNodeMBFunc,
  50. typename SetOBBNodeMBFunc,
  51. typename CreateLeafFunc,
  52. typename ProgressMonitor>
  53. class BuilderT
  54. {
  55. ALIGNED_CLASS_(16);
  56. static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
  57. static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
  58. static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
  59. typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
  60. typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
  61. typedef FastAllocator::CachedAllocator Allocator;
  62. typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
  63. typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
  64. typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
  65. typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
  66. public:
  67. BuilderT (Scene* scene,
  68. const RecalculatePrimRef& recalculatePrimRef,
  69. const CreateAllocFunc& createAlloc,
  70. const CreateAABBNodeMBFunc& createAABBNodeMB,
  71. const SetAABBNodeMBFunc& setAABBNodeMB,
  72. const CreateOBBNodeMBFunc& createOBBNodeMB,
  73. const SetOBBNodeMBFunc& setOBBNodeMB,
  74. const CreateLeafFunc& createLeaf,
  75. const ProgressMonitor& progressMonitor,
  76. const Settings settings)
  77. : cfg(settings),
  78. scene(scene),
  79. recalculatePrimRef(recalculatePrimRef),
  80. createAlloc(createAlloc),
  81. createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
  82. createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
  83. createLeaf(createLeaf),
  84. progressMonitor(progressMonitor),
  85. unalignedHeuristic(scene),
  86. temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
  87. private:
  88. /*! checks if all primitives are from the same geometry */
  89. __forceinline bool sameGeometry(const SetMB& set)
  90. {
  91. mvector<PrimRefMB>& prims = *set.prims;
  92. unsigned int firstGeomID = prims[set.begin()].geomID();
  93. for (size_t i=set.begin()+1; i<set.end(); i++) {
  94. if (prims[i].geomID() != firstGeomID){
  95. return false;
  96. }
  97. }
  98. return true;
  99. }
  100. /*! performs some split if SAH approaches fail */
  101. void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
  102. {
  103. mvector<PrimRefMB>& prims = *set.prims;
  104. const size_t begin = set.begin();
  105. const size_t end = set.end();
  106. const size_t center = (begin + end)/2;
  107. PrimInfoMB linfo = empty;
  108. for (size_t i=begin; i<center; i++)
  109. linfo.add_primref(prims[i]);
  110. PrimInfoMB rinfo = empty;
  111. for (size_t i=center; i<end; i++)
  112. rinfo.add_primref(prims[i]);
  113. new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
  114. new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
  115. }
  116. void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
  117. {
  118. assert(set.size() > 1);
  119. const size_t begin = set.begin();
  120. const size_t end = set.end();
  121. PrimInfoMB linfo(empty);
  122. PrimInfoMB rinfo(empty);
  123. unsigned int geomID = (*set.prims)[begin].geomID();
  124. size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
  125. [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
  126. [ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
  127. new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
  128. new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
  129. }
  130. /*! creates a large leaf that could be larger than supported by the BVH */
  131. NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
  132. {
  133. /* this should never occur but is a fatal error */
  134. if (current.depth > cfg.maxDepth)
  135. throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
  136. /* special case when directly creating leaf without any splits that could shrink time_range */
  137. bool force_split = false;
  138. if (current.depth == 1 && current.size() > 0)
  139. {
  140. BBox1f c = empty;
  141. BBox1f p = current.prims.time_range;
  142. for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
  143. mvector<PrimRefMB>& prims = *current.prims.prims;
  144. c.extend(prims[i].time_range);
  145. }
  146. force_split = c.lower > p.lower || c.upper < p.upper;
  147. }
  148. /* create leaf for few primitives */
  149. if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
  150. return createLeaf(current.prims,alloc);
  151. /* fill all children by always splitting the largest one */
  152. LocalChildList children(current);
  153. NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
  154. do {
  155. /* find best child with largest bounding box area */
  156. int bestChild = -1;
  157. size_t bestSize = 0;
  158. for (unsigned i=0; i<children.size(); i++)
  159. {
  160. /* ignore leaves as they cannot get split */
  161. if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
  162. continue;
  163. force_split = false;
  164. /* remember child with largest size */
  165. if (children[i].size() > bestSize) {
  166. bestSize = children[i].size();
  167. bestChild = i;
  168. }
  169. }
  170. if (bestChild == -1) break;
  171. /*! split best child into left and right child */
  172. BuildRecord left(current.depth+1);
  173. BuildRecord right(current.depth+1);
  174. if (!sameGeometry(children[bestChild].prims)) {
  175. splitByGeometry(children[bestChild].prims,left.prims,right.prims);
  176. } else {
  177. splitFallback(children[bestChild].prims,left.prims,right.prims);
  178. }
  179. children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
  180. } while (children.size() < cfg.branchingFactor);
  181. /* detect time_ranges that have shrunken */
  182. bool timesplit = false;
  183. for (size_t i=0; i<children.size(); i++) {
  184. const BBox1f c = children[i].prims.time_range;
  185. const BBox1f p = current.prims.time_range;
  186. timesplit |= c.lower > p.lower || c.upper < p.upper;
  187. }
  188. /* create node */
  189. NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
  190. LBBox3fa bounds = empty;
  191. for (size_t i=0; i<children.size(); i++) {
  192. values[i] = createLargeLeaf(children[i],alloc);
  193. bounds.extend(values[i].lbounds);
  194. }
  195. setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
  196. if (timesplit)
  197. bounds = current.prims.linearBounds(recalculatePrimRef);
  198. return NodeRecordMB4D(node,bounds,current.prims.time_range);
  199. }
  200. /*! performs split */
  201. std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
  202. {
  203. /* variable to track the SAH of the best splitting approach */
  204. float bestSAH = inf;
  205. const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
  206. /* perform standard binning in aligned space */
  207. HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
  208. float alignedObjectSAH = alignedObjectSplit.splitSAH();
  209. bestSAH = min(alignedObjectSAH,bestSAH);
  210. /* perform standard binning in unaligned space */
  211. UnalignedHeuristicBinning::Split unalignedObjectSplit;
  212. LinearSpace3fa uspace;
  213. float unalignedObjectSAH = inf;
  214. if (alignedObjectSAH > 0.7f*leafSAH) {
  215. uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
  216. const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
  217. unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
  218. unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
  219. bestSAH = min(unalignedObjectSAH,bestSAH);
  220. }
  221. /* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
  222. float temporal_split_sah = inf;
  223. typename HeuristicTemporal::Split temporal_split;
  224. if (bestSAH > 0.5f*leafSAH) {
  225. if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
  226. temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
  227. temporal_split_sah = temporal_split.splitSAH();
  228. bestSAH = min(temporal_split_sah,bestSAH);
  229. }
  230. }
  231. /* perform fallback split if SAH heuristics failed */
  232. if (unlikely(!std::isfinite(bestSAH))) {
  233. current.prims.deterministic_order();
  234. splitFallback(current.prims,lrecord.prims,rrecord.prims);
  235. }
  236. /* perform aligned split if this is best */
  237. else if (likely(bestSAH == alignedObjectSAH)) {
  238. alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
  239. }
  240. /* perform unaligned split if this is best */
  241. else if (likely(bestSAH == unalignedObjectSAH)) {
  242. unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
  243. aligned = false;
  244. }
  245. /* perform temporal split if this is best */
  246. else if (likely(bestSAH == temporal_split_sah)) {
  247. timesplit = true;
  248. return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
  249. }
  250. else
  251. assert(false);
  252. return std::unique_ptr<mvector<PrimRefMB>>();
  253. }
  254. /*! recursive build */
  255. NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
  256. {
  257. /* get thread local allocator */
  258. if (!alloc)
  259. alloc = createAlloc();
  260. /* call memory monitor function to signal progress */
  261. if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
  262. progressMonitor(current.size());
  263. /* create leaf node */
  264. if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
  265. current.prims.deterministic_order();
  266. return createLargeLeaf(current,alloc);
  267. }
  268. /* fill all children by always splitting the one with the largest surface area */
  269. NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
  270. LocalChildList children(current);
  271. bool aligned = true;
  272. bool timesplit = false;
  273. do {
  274. /* find best child with largest bounding box area */
  275. ssize_t bestChild = -1;
  276. float bestArea = neg_inf;
  277. for (size_t i=0; i<children.size(); i++)
  278. {
  279. /* ignore leaves as they cannot get split */
  280. if (children[i].size() <= cfg.minLeafSize)
  281. continue;
  282. /* remember child with largest area */
  283. const float A = children[i].prims.halfArea();
  284. if (A > bestArea) {
  285. bestArea = children[i].prims.halfArea();
  286. bestChild = i;
  287. }
  288. }
  289. if (bestChild == -1) break;
  290. /*! split best child into left and right child */
  291. BuildRecord left(current.depth+1);
  292. BuildRecord right(current.depth+1);
  293. std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
  294. children.split(bestChild,left,right,std::move(new_vector));
  295. } while (children.size() < cfg.branchingFactor);
  296. /* detect time_ranges that have shrunken */
  297. for (size_t i=0; i<children.size(); i++) {
  298. const BBox1f c = children[i].prims.time_range;
  299. const BBox1f p = current.prims.time_range;
  300. timesplit |= c.lower > p.lower || c.upper < p.upper;
  301. }
  302. /* create time split node */
  303. if (timesplit)
  304. {
  305. const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
  306. /* spawn tasks or ... */
  307. if (current.size() > SINGLE_THREADED_THRESHOLD)
  308. {
  309. parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
  310. for (size_t i=r.begin(); i<r.end(); i++) {
  311. values[i] = recurse(children[i],nullptr,true);
  312. _mm_mfence(); // to allow non-temporal stores during build
  313. }
  314. });
  315. }
  316. /* ... continue sequential */
  317. else {
  318. for (size_t i=0; i<children.size(); i++) {
  319. values[i] = recurse(children[i],alloc,false);
  320. }
  321. }
  322. setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
  323. const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
  324. return NodeRecordMB4D(node,bounds,current.prims.time_range);
  325. }
  326. /* create aligned node */
  327. else if (aligned)
  328. {
  329. const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
  330. /* spawn tasks or ... */
  331. if (current.size() > SINGLE_THREADED_THRESHOLD)
  332. {
  333. LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
  334. parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
  335. for (size_t i=r.begin(); i<r.end(); i++) {
  336. values[i] = recurse(children[i],nullptr,true);
  337. cbounds[i] = values[i].lbounds;
  338. _mm_mfence(); // to allow non-temporal stores during build
  339. }
  340. });
  341. LBBox3fa bounds = empty;
  342. for (size_t i=0; i<children.size(); i++)
  343. bounds.extend(cbounds[i]);
  344. setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
  345. return NodeRecordMB4D(node,bounds,current.prims.time_range);
  346. }
  347. /* ... continue sequentially */
  348. else
  349. {
  350. LBBox3fa bounds = empty;
  351. for (size_t i=0; i<children.size(); i++) {
  352. values[i] = recurse(children[i],alloc,false);
  353. bounds.extend(values[i].lbounds);
  354. }
  355. setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
  356. return NodeRecordMB4D(node,bounds,current.prims.time_range);
  357. }
  358. }
  359. /* create unaligned node */
  360. else
  361. {
  362. const NodeRef node = createOBBNodeMB(alloc);
  363. /* spawn tasks or ... */
  364. if (current.size() > SINGLE_THREADED_THRESHOLD)
  365. {
  366. parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
  367. for (size_t i=r.begin(); i<r.end(); i++) {
  368. const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
  369. const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
  370. const auto child = recurse(children[i],nullptr,true);
  371. setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
  372. _mm_mfence(); // to allow non-temporal stores during build
  373. }
  374. });
  375. }
  376. /* ... continue sequentially */
  377. else
  378. {
  379. for (size_t i=0; i<children.size(); i++) {
  380. const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
  381. const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
  382. const auto child = recurse(children[i],alloc,false);
  383. setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
  384. }
  385. }
  386. const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
  387. return NodeRecordMB4D(node,bounds,current.prims.time_range);
  388. }
  389. }
  390. public:
  391. /*! entry point into builder */
  392. NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
  393. {
  394. BuildRecord record(SetMB(pinfo,&prims),1);
  395. auto root = recurse(record,nullptr,true);
  396. _mm_mfence(); // to allow non-temporal stores during build
  397. return root;
  398. }
  399. private:
  400. Settings cfg;
  401. Scene* scene;
  402. const RecalculatePrimRef& recalculatePrimRef;
  403. const CreateAllocFunc& createAlloc;
  404. const CreateAABBNodeMBFunc& createAABBNodeMB;
  405. const SetAABBNodeMBFunc& setAABBNodeMB;
  406. const CreateOBBNodeMBFunc& createOBBNodeMB;
  407. const SetOBBNodeMBFunc& setOBBNodeMB;
  408. const CreateLeafFunc& createLeaf;
  409. const ProgressMonitor& progressMonitor;
  410. private:
  411. HeuristicBinning alignedHeuristic;
  412. UnalignedHeuristicBinning unalignedHeuristic;
  413. HeuristicTemporal temporalSplitHeuristic;
  414. };
  415. template<typename NodeRef,
  416. typename RecalculatePrimRef,
  417. typename CreateAllocFunc,
  418. typename CreateAABBNodeMBFunc,
  419. typename SetAABBNodeMBFunc,
  420. typename CreateOBBNodeMBFunc,
  421. typename SetOBBNodeMBFunc,
  422. typename CreateLeafFunc,
  423. typename ProgressMonitor>
  424. static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
  425. const RecalculatePrimRef& recalculatePrimRef,
  426. const CreateAllocFunc& createAlloc,
  427. const CreateAABBNodeMBFunc& createAABBNodeMB,
  428. const SetAABBNodeMBFunc& setAABBNodeMB,
  429. const CreateOBBNodeMBFunc& createOBBNodeMB,
  430. const SetOBBNodeMBFunc& setOBBNodeMB,
  431. const CreateLeafFunc& createLeaf,
  432. const ProgressMonitor& progressMonitor,
  433. const Settings settings)
  434. {
  435. typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
  436. CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
  437. CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
  438. CreateLeafFunc,ProgressMonitor> Builder;
  439. Builder builder(scene,recalculatePrimRef,createAlloc,
  440. createAABBNodeMB,setAABBNodeMB,
  441. createOBBNodeMB,setOBBNodeMB,
  442. createLeaf,progressMonitor,settings);
  443. return builder(prims,pinfo);
  444. }
  445. };
  446. }
  447. }