bvh_builder_sah.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "heuristic_binning_array_aligned.h"
  5. #include "heuristic_spatial_array.h"
  6. #include "heuristic_openmerge_array.h"
  7. #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
  8. # define NUM_OBJECT_BINS 16
  9. # define NUM_SPATIAL_BINS 16
  10. #else
  11. # define NUM_OBJECT_BINS 32
  12. # define NUM_SPATIAL_BINS 16
  13. #endif
  14. namespace embree
  15. {
  16. namespace isa
  17. {
  18. MAYBE_UNUSED static const float travCost = 1.0f;
  19. MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
  20. struct GeneralBVHBuilder
  21. {
  22. static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
  23. static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
  24. /*! settings for SAH builder */
  25. struct Settings
  26. {
  27. /*! default settings */
  28. Settings ()
  29. : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
  30. travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}
  31. /*! initialize settings from API settings */
  32. Settings (const RTCBuildArguments& settings)
  33. : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
  34. travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)
  35. {
  36. if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
  37. if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
  38. if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(settings.sahBlockSize);
  39. if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
  40. if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
  41. if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;
  42. if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;
  43. minLeafSize = min(minLeafSize,maxLeafSize);
  44. }
  45. Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
  46. : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
  47. travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
  48. {
  49. minLeafSize = min(minLeafSize,maxLeafSize);
  50. }
  51. public:
  52. size_t branchingFactor; //!< branching factor of BVH to build
  53. size_t maxDepth; //!< maximum depth of BVH to build
  54. size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
  55. size_t minLeafSize; //!< minimum size of a leaf
  56. size_t maxLeafSize; //!< maximum size of a leaf
  57. float travCost; //!< estimated cost of one traversal step
  58. float intCost; //!< estimated cost of one primitive intersection
  59. size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
  60. size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished
  61. };
  62. /*! recursive state of builder */
  63. template<typename Set, typename Split>
  64. struct BuildRecordT
  65. {
  66. public:
  67. __forceinline BuildRecordT () {}
  68. __forceinline BuildRecordT (size_t depth)
  69. : depth(depth), alloc_barrier(false), prims(empty) {}
  70. __forceinline BuildRecordT (size_t depth, const Set& prims)
  71. : depth(depth), alloc_barrier(false), prims(prims) {}
  72. __forceinline BBox3fa bounds() const { return prims.geomBounds; }
  73. __forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }
  74. __forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }
  75. __forceinline size_t size() const { return prims.size(); }
  76. public:
  77. size_t depth; //!< Depth of the root of this subtree.
  78. bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes
  79. Set prims; //!< The list of primitives.
  80. };
  81. template<typename PrimRef, typename Set>
  82. struct DefaultCanCreateLeafFunc
  83. {
  84. __forceinline bool operator()(const PrimRef*, const Set&) const { return true; }
  85. };
  86. template<typename PrimRef, typename Set>
  87. struct DefaultCanCreateLeafSplitFunc
  88. {
  89. __forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }
  90. };
  91. template<typename BuildRecord,
  92. typename Heuristic,
  93. typename Set,
  94. typename PrimRef,
  95. typename ReductionTy,
  96. typename Allocator,
  97. typename CreateAllocFunc,
  98. typename CreateNodeFunc,
  99. typename UpdateNodeFunc,
  100. typename CreateLeafFunc,
  101. typename CanCreateLeafFunc,
  102. typename CanCreateLeafSplitFunc,
  103. typename ProgressMonitor>
  104. class BuilderT
  105. {
  106. friend struct GeneralBVHBuilder;
  107. BuilderT (PrimRef* prims,
  108. Heuristic& heuristic,
  109. const CreateAllocFunc& createAlloc,
  110. const CreateNodeFunc& createNode,
  111. const UpdateNodeFunc& updateNode,
  112. const CreateLeafFunc& createLeaf,
  113. const CanCreateLeafFunc& canCreateLeaf,
  114. const CanCreateLeafSplitFunc& canCreateLeafSplit,
  115. const ProgressMonitor& progressMonitor,
  116. const Settings& settings) :
  117. cfg(settings),
  118. prims(prims),
  119. heuristic(heuristic),
  120. createAlloc(createAlloc),
  121. createNode(createNode),
  122. updateNode(updateNode),
  123. createLeaf(createLeaf),
  124. canCreateLeaf(canCreateLeaf),
  125. canCreateLeafSplit(canCreateLeafSplit),
  126. progressMonitor(progressMonitor)
  127. {
  128. if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
  129. throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
  130. }
  131. const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)
  132. {
  133. /* this should never occur but is a fatal error */
  134. if (current.depth > cfg.maxDepth)
  135. throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
  136. /* create leaf for few primitives */
  137. if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))
  138. return createLeaf(prims,current.prims,alloc);
  139. /* fill all children by always splitting the largest one */
  140. ReductionTy values[MAX_BRANCHING_FACTOR];
  141. BuildRecord children[MAX_BRANCHING_FACTOR];
  142. size_t numChildren = 1;
  143. children[0] = current;
  144. do {
  145. /* find best child with largest bounding box area */
  146. size_t bestChild = -1;
  147. size_t bestSize = 0;
  148. for (size_t i=0; i<numChildren; i++)
  149. {
  150. /* ignore leaves as they cannot get split */
  151. if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))
  152. continue;
  153. /* remember child with largest size */
  154. if (children[i].prims.size() > bestSize) {
  155. bestSize = children[i].prims.size();
  156. bestChild = i;
  157. }
  158. }
  159. if (bestChild == (size_t)-1) break;
  160. /*! split best child into left and right child */
  161. BuildRecord left(current.depth+1);
  162. BuildRecord right(current.depth+1);
  163. if (!canCreateLeaf(prims,children[bestChild].prims)) {
  164. canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);
  165. } else {
  166. heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);
  167. }
  168. /* add new children left and right */
  169. children[bestChild] = children[numChildren-1];
  170. children[numChildren-1] = left;
  171. children[numChildren+0] = right;
  172. numChildren++;
  173. } while (numChildren < cfg.branchingFactor);
  174. /* set barrier for primrefarrayalloc */
  175. if (unlikely(current.size() > cfg.primrefarrayalloc))
  176. for (size_t i=0; i<numChildren; i++)
  177. children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
  178. /* create node */
  179. auto node = createNode(children,numChildren,alloc);
  180. /* recurse into each child and perform reduction */
  181. for (size_t i=0; i<numChildren; i++)
  182. values[i] = createLargeLeaf(children[i],alloc);
  183. /* perform reduction */
  184. return updateNode(current,children,node,values,numChildren);
  185. }
  186. const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)
  187. {
  188. /* get thread local allocator */
  189. if (!alloc)
  190. alloc = createAlloc();
  191. /* call memory monitor function to signal progress */
  192. if (toplevel && current.size() <= cfg.singleThreadThreshold)
  193. progressMonitor(current.size());
  194. /*! find best split */
  195. auto split = heuristic.find(current.prims,cfg.logBlockSize);
  196. /*! compute leaf and split cost */
  197. const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
  198. const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();
  199. assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
  200. /*! create a leaf node when threshold reached or SAH tells us to stop */
  201. if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
  202. heuristic.deterministic_order(current.prims);
  203. return createLargeLeaf(current,alloc);
  204. }
  205. /*! perform initial split */
  206. Set lprims,rprims;
  207. heuristic.split(split,current.prims,lprims,rprims);
  208. /*! initialize child list with initial split */
  209. ReductionTy values[MAX_BRANCHING_FACTOR];
  210. BuildRecord children[MAX_BRANCHING_FACTOR];
  211. children[0] = BuildRecord(current.depth+1,lprims);
  212. children[1] = BuildRecord(current.depth+1,rprims);
  213. size_t numChildren = 2;
  214. /*! split until node is full or SAH tells us to stop */
  215. while (numChildren < cfg.branchingFactor)
  216. {
  217. /*! find best child to split */
  218. float bestArea = neg_inf;
  219. ssize_t bestChild = -1;
  220. for (size_t i=0; i<numChildren; i++)
  221. {
  222. /* ignore leaves as they cannot get split */
  223. if (children[i].prims.size() <= cfg.minLeafSize) continue;
  224. /* find child with largest surface area */
  225. if (halfArea(children[i].prims.geomBounds) > bestArea) {
  226. bestChild = i;
  227. bestArea = halfArea(children[i].prims.geomBounds);
  228. }
  229. }
  230. if (bestChild == -1) break;
  231. /* perform best found split */
  232. BuildRecord& brecord = children[bestChild];
  233. BuildRecord lrecord(current.depth+1);
  234. BuildRecord rrecord(current.depth+1);
  235. auto split = heuristic.find(brecord.prims,cfg.logBlockSize);
  236. heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);
  237. children[bestChild ] = lrecord;
  238. children[numChildren] = rrecord;
  239. numChildren++;
  240. }
  241. /* set barrier for primrefarrayalloc */
  242. if (unlikely(current.size() > cfg.primrefarrayalloc))
  243. for (size_t i=0; i<numChildren; i++)
  244. children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
  245. /* sort buildrecords for faster shadow ray traversal */
  246. std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());
  247. /*! create an inner node */
  248. auto node = createNode(children,numChildren,alloc);
  249. /* spawn tasks */
  250. if (current.size() > cfg.singleThreadThreshold)
  251. {
  252. /*! parallel_for is faster than spawning sub-tasks */
  253. parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
  254. for (size_t i=r.begin(); i<r.end(); i++) {
  255. values[i] = recurse(children[i],nullptr,true);
  256. _mm_mfence(); // to allow non-temporal stores during build
  257. }
  258. });
  259. return updateNode(current,children,node,values,numChildren);
  260. }
  261. /* recurse into each child */
  262. else
  263. {
  264. for (size_t i=0; i<numChildren; i++)
  265. values[i] = recurse(children[i],alloc,false);
  266. return updateNode(current,children,node,values,numChildren);
  267. }
  268. }
  269. private:
  270. Settings cfg;
  271. PrimRef* prims;
  272. Heuristic& heuristic;
  273. const CreateAllocFunc& createAlloc;
  274. const CreateNodeFunc& createNode;
  275. const UpdateNodeFunc& updateNode;
  276. const CreateLeafFunc& createLeaf;
  277. const CanCreateLeafFunc& canCreateLeaf;
  278. const CanCreateLeafSplitFunc& canCreateLeafSplit;
  279. const ProgressMonitor& progressMonitor;
  280. };
  281. template<
  282. typename ReductionTy,
  283. typename Heuristic,
  284. typename Set,
  285. typename PrimRef,
  286. typename CreateAllocFunc,
  287. typename CreateNodeFunc,
  288. typename UpdateNodeFunc,
  289. typename CreateLeafFunc,
  290. typename ProgressMonitor>
  291. __noinline static ReductionTy build(Heuristic& heuristic,
  292. PrimRef* prims,
  293. const Set& set,
  294. CreateAllocFunc createAlloc,
  295. CreateNodeFunc createNode, UpdateNodeFunc updateNode,
  296. const CreateLeafFunc& createLeaf,
  297. const ProgressMonitor& progressMonitor,
  298. const Settings& settings)
  299. {
  300. typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
  301. typedef BuilderT<
  302. BuildRecord,
  303. Heuristic,
  304. Set,
  305. PrimRef,
  306. ReductionTy,
  307. decltype(createAlloc()),
  308. CreateAllocFunc,
  309. CreateNodeFunc,
  310. UpdateNodeFunc,
  311. CreateLeafFunc,
  312. DefaultCanCreateLeafFunc<PrimRef, Set>,
  313. DefaultCanCreateLeafSplitFunc<PrimRef, Set>,
  314. ProgressMonitor> Builder;
  315. /* instantiate builder */
  316. Builder builder(prims,
  317. heuristic,
  318. createAlloc,
  319. createNode,
  320. updateNode,
  321. createLeaf,
  322. DefaultCanCreateLeafFunc<PrimRef, Set>(),
  323. DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),
  324. progressMonitor,
  325. settings);
  326. /* build hierarchy */
  327. BuildRecord record(1,set);
  328. const ReductionTy root = builder.recurse(record,nullptr,true);
  329. _mm_mfence(); // to allow non-temporal stores during build
  330. return root;
  331. }
  332. template<
  333. typename ReductionTy,
  334. typename Heuristic,
  335. typename Set,
  336. typename PrimRef,
  337. typename CreateAllocFunc,
  338. typename CreateNodeFunc,
  339. typename UpdateNodeFunc,
  340. typename CreateLeafFunc,
  341. typename CanCreateLeafFunc,
  342. typename CanCreateLeafSplitFunc,
  343. typename ProgressMonitor>
  344. __noinline static ReductionTy build(Heuristic& heuristic,
  345. PrimRef* prims,
  346. const Set& set,
  347. CreateAllocFunc createAlloc,
  348. CreateNodeFunc createNode, UpdateNodeFunc updateNode,
  349. const CreateLeafFunc& createLeaf,
  350. const CanCreateLeafFunc& canCreateLeaf,
  351. const CanCreateLeafSplitFunc& canCreateLeafSplit,
  352. const ProgressMonitor& progressMonitor,
  353. const Settings& settings)
  354. {
  355. typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
  356. typedef BuilderT<
  357. BuildRecord,
  358. Heuristic,
  359. Set,
  360. PrimRef,
  361. ReductionTy,
  362. decltype(createAlloc()),
  363. CreateAllocFunc,
  364. CreateNodeFunc,
  365. UpdateNodeFunc,
  366. CreateLeafFunc,
  367. CanCreateLeafFunc,
  368. CanCreateLeafSplitFunc,
  369. ProgressMonitor> Builder;
  370. /* instantiate builder */
  371. Builder builder(prims,
  372. heuristic,
  373. createAlloc,
  374. createNode,
  375. updateNode,
  376. createLeaf,
  377. canCreateLeaf,
  378. canCreateLeafSplit,
  379. progressMonitor,
  380. settings);
  381. /* build hierarchy */
  382. BuildRecord record(1,set);
  383. const ReductionTy root = builder.recurse(record,nullptr,true);
  384. _mm_mfence(); // to allow non-temporal stores during build
  385. return root;
  386. }
  387. };
  388. /* SAH builder that operates on an array of BuildRecords */
  389. struct BVHBuilderBinnedSAH
  390. {
  391. typedef PrimInfoRange Set;
  392. typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;
  393. typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
  394. typedef GeneralBVHBuilder::Settings Settings;
  395. /*! special builder that propagates reduction over the tree */
  396. template<
  397. typename ReductionTy,
  398. typename CreateAllocFunc,
  399. typename CreateNodeFunc,
  400. typename UpdateNodeFunc,
  401. typename CreateLeafFunc,
  402. typename ProgressMonitor>
  403. static ReductionTy build(CreateAllocFunc createAlloc,
  404. CreateNodeFunc createNode, UpdateNodeFunc updateNode,
  405. const CreateLeafFunc& createLeaf,
  406. const ProgressMonitor& progressMonitor,
  407. PrimRef* prims, const PrimInfo& pinfo,
  408. const Settings& settings)
  409. {
  410. Heuristic heuristic(prims);
  411. return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
  412. heuristic,
  413. prims,
  414. PrimInfoRange(0,pinfo.size(),pinfo),
  415. createAlloc,
  416. createNode,
  417. updateNode,
  418. createLeaf,
  419. progressMonitor,
  420. settings);
  421. }
  422. /*! special builder that propagates reduction over the tree */
  423. template<
  424. typename ReductionTy,
  425. typename CreateAllocFunc,
  426. typename CreateNodeFunc,
  427. typename UpdateNodeFunc,
  428. typename CreateLeafFunc,
  429. typename CanCreateLeafFunc,
  430. typename CanCreateLeafSplitFunc,
  431. typename ProgressMonitor>
  432. static ReductionTy build(CreateAllocFunc createAlloc,
  433. CreateNodeFunc createNode, UpdateNodeFunc updateNode,
  434. const CreateLeafFunc& createLeaf,
  435. const CanCreateLeafFunc& canCreateLeaf,
  436. const CanCreateLeafSplitFunc& canCreateLeafSplit,
  437. const ProgressMonitor& progressMonitor,
  438. PrimRef* prims, const PrimInfo& pinfo,
  439. const Settings& settings)
  440. {
  441. Heuristic heuristic(prims);
  442. return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
  443. heuristic,
  444. prims,
  445. PrimInfoRange(0,pinfo.size(),pinfo),
  446. createAlloc,
  447. createNode,
  448. updateNode,
  449. createLeaf,
  450. canCreateLeaf,
  451. canCreateLeafSplit,
  452. progressMonitor,
  453. settings);
  454. }
  455. };
  456. /* Spatial SAH builder that operates on an double-buffered array of BuildRecords */
  457. struct BVHBuilderBinnedFastSpatialSAH
  458. {
  459. typedef PrimInfoExtRange Set;
  460. typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;
  461. typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
  462. typedef GeneralBVHBuilder::Settings Settings;
  463. static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
  464. static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
  465. template<typename ReductionTy, typename UserCreateLeaf>
  466. struct CreateLeafExt
  467. {
  468. __forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)
  469. : userCreateLeaf(userCreateLeaf) {}
  470. // __noinline is workaround for ICC2016 compiler bug
  471. template<typename Allocator>
  472. __noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const
  473. {
  474. for (size_t i=range.begin(); i<range.end(); i++)
  475. prims[i].lower.u &= GEOMID_MASK;
  476. return userCreateLeaf(prims,range,alloc);
  477. }
  478. const UserCreateLeaf userCreateLeaf;
  479. };
  480. /*! special builder that propagates reduction over the tree */
  481. template<
  482. typename ReductionTy,
  483. typename CreateAllocFunc,
  484. typename CreateNodeFunc,
  485. typename UpdateNodeFunc,
  486. typename CreateLeafFunc,
  487. typename SplitPrimitiveFunc,
  488. typename ProgressMonitor>
  489. static ReductionTy build(CreateAllocFunc createAlloc,
  490. CreateNodeFunc createNode,
  491. UpdateNodeFunc updateNode,
  492. const CreateLeafFunc& createLeaf,
  493. SplitPrimitiveFunc splitPrimitive,
  494. ProgressMonitor progressMonitor,
  495. PrimRef* prims,
  496. const size_t extSize,
  497. const PrimInfo& pinfo,
  498. const Settings& settings)
  499. {
  500. typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;
  501. Heuristic heuristic(splitPrimitive,prims,pinfo);
  502. /* calculate total surface area */ // FIXME: this sum is not deterministic
  503. const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {
  504. double A = 0.0f;
  505. for (size_t i=r.begin(); i<r.end(); i++)
  506. {
  507. PrimRef& prim = prims[i];
  508. A += area(prim.bounds());
  509. }
  510. return A;
  511. },std::plus<double>());
  512. /* calculate maximum number of spatial splits per primitive */
  513. const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;
  514. const float f = 10.0f;
  515. const float invA = 1.0f / A;
  516. parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {
  517. for (size_t i=r.begin(); i<r.end(); i++)
  518. {
  519. PrimRef& prim = prims[i];
  520. assert((prim.geomID() & SPLITS_MASK) == 0);
  521. // FIXME: is there a better general heuristic ?
  522. const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);
  523. unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));
  524. prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
  525. }
  526. });
  527. return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
  528. heuristic,
  529. prims,
  530. PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
  531. createAlloc,
  532. createNode,
  533. updateNode,
  534. CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),
  535. progressMonitor,
  536. settings);
  537. }
  538. };
  539. /* Open/Merge SAH builder that operates on an array of BuildRecords */
  540. struct BVHBuilderBinnedOpenMergeSAH
  541. {
  542. static const size_t NUM_OBJECT_BINS_HQ = 32;
  543. typedef PrimInfoExtRange Set;
  544. typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;
  545. typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
  546. typedef GeneralBVHBuilder::Settings Settings;
  547. /*! special builder that propagates reduction over the tree */
  548. template<
  549. typename ReductionTy,
  550. typename BuildRef,
  551. typename CreateAllocFunc,
  552. typename CreateNodeFunc,
  553. typename UpdateNodeFunc,
  554. typename CreateLeafFunc,
  555. typename NodeOpenerFunc,
  556. typename ProgressMonitor>
  557. static ReductionTy build(CreateAllocFunc createAlloc,
  558. CreateNodeFunc createNode,
  559. UpdateNodeFunc updateNode,
  560. const CreateLeafFunc& createLeaf,
  561. NodeOpenerFunc nodeOpenerFunc,
  562. ProgressMonitor progressMonitor,
  563. BuildRef* prims,
  564. const size_t extSize,
  565. const PrimInfo& pinfo,
  566. const Settings& settings)
  567. {
  568. typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;
  569. Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);
  570. return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(
  571. heuristic,
  572. prims,
  573. PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
  574. createAlloc,
  575. createNode,
  576. updateNode,
  577. createLeaf,
  578. progressMonitor,
  579. settings);
  580. }
  581. };
  582. }
  583. }