heuristic_openmerge_array.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. // TODO:
  4. // - adjust parallel build thresholds
  5. // - openNodesBasedOnExtend should consider max extended size
  6. #pragma once
  7. #include "heuristic_binning.h"
  8. #include "heuristic_spatial.h"
  9. /* stop opening of all bref.geomIDs are the same */
  10. #define EQUAL_GEOMID_STOP_CRITERIA 1
  11. /* 10% spatial extend threshold */
  12. #define MAX_EXTEND_THRESHOLD 0.1f
  13. /* maximum is 8 children */
  14. #define MAX_OPENED_CHILD_NODES 8
  15. /* open until all build refs are below threshold size in one step */
  16. #define USE_LOOP_OPENING 0
  17. namespace embree
  18. {
  19. namespace isa
  20. {
  21. /*! Performs standard object binning */
  22. template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
  23. struct HeuristicArrayOpenMergeSAH
  24. {
  25. typedef BinSplit<OBJECT_BINS> Split;
  26. typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
  27. static const size_t PARALLEL_THRESHOLD = 1024;
  28. static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
  29. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  30. static const size_t MOVE_STEP_SIZE = 64;
  31. static const size_t CREATE_SPLITS_STEP_SIZE = 128;
  32. __forceinline HeuristicArrayOpenMergeSAH ()
  33. : prims0(nullptr) {}
  34. /*! remember prim array */
  35. __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
  36. : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
  37. {
  38. assert(max_open_size <= MAX_OPENED_CHILD_NODES);
  39. }
  40. struct OpenHeuristic
  41. {
  42. __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
  43. {
  44. const Vec3fa diag = pinfo.geomBounds.size();
  45. dim = maxDim(diag);
  46. assert(diag[dim] > 0.0f);
  47. inv_max_extend = 1.0f / diag[dim];
  48. }
  49. __forceinline bool operator () ( PrimRef& prim ) const {
  50. return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
  51. }
  52. private:
  53. size_t dim;
  54. float inv_max_extend;
  55. };
  56. /*! compute extended ranges */
  57. __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
  58. {
  59. assert(set.ext_range_size() > 0);
  60. const float left_factor = (float)lweight / (lweight + rweight);
  61. const size_t ext_range_size = set.ext_range_size();
  62. const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
  63. const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
  64. lset.set_ext_range(lset.end() + left_ext_range_size);
  65. rset.set_ext_range(rset.end() + right_ext_range_size);
  66. }
  67. /*! move ranges */
  68. __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  69. {
  70. const size_t left_ext_range_size = lset.ext_range_size();
  71. const size_t right_size = rset.size();
  72. /* has the left child an extended range? */
  73. if (left_ext_range_size > 0)
  74. {
  75. /* left extended range smaller than right range ? */
  76. if (left_ext_range_size < right_size)
  77. {
  78. /* only move a small part of the beginning of the right range to the end */
  79. parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
  80. for (size_t i=r.begin(); i<r.end(); i++)
  81. prims0[i+right_size] = prims0[i];
  82. });
  83. }
  84. else
  85. {
  86. /* no overlap, move entire right range to new location, can be made fully parallel */
  87. parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
  88. for (size_t i=r.begin(); i<r.end(); i++)
  89. prims0[i+left_ext_range_size] = prims0[i];
  90. });
  91. }
  92. /* update right range */
  93. assert(rset.ext_end() + left_ext_range_size == set.ext_end());
  94. rset.move_right(left_ext_range_size);
  95. }
  96. }
  97. /* estimates the extra space required when opening, and checks if all primitives are from same geometry */
  98. __noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
  99. {
  100. const OpenHeuristic heuristic(set);
  101. const unsigned int geomID = prims0[set.begin()].geomID();
  102. auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
  103. bool commonGeomID = true;
  104. size_t opens = 0;
  105. for (size_t i=r.begin(); i<r.end(); i++) {
  106. commonGeomID &= prims0[i].geomID() == geomID;
  107. if (heuristic(prims0[i]))
  108. opens += prims0[i].node.getN()-1; // coarse approximation
  109. }
  110. return std::pair<size_t,bool>(opens,commonGeomID);
  111. };
  112. auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
  113. return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
  114. };
  115. return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
  116. }
  117. // FIXME: should consider maximum available extended size
  118. __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
  119. {
  120. const OpenHeuristic heuristic(set);
  121. const size_t ext_range_start = set.end();
  122. if (false && set.size() < PARALLEL_THRESHOLD)
  123. {
  124. size_t extra_elements = 0;
  125. for (size_t i=set.begin(); i<set.end(); i++)
  126. {
  127. if (heuristic(prims0[i]))
  128. {
  129. PrimRef tmp[MAX_OPENED_CHILD_NODES];
  130. const size_t n = nodeOpenerFunc(prims0[i],tmp);
  131. assert(extra_elements + n-1 <= set.ext_range_size());
  132. for (size_t j=0; j<n; j++)
  133. set.extend_center2(tmp[j]);
  134. prims0[i] = tmp[0];
  135. for (size_t j=1; j<n; j++)
  136. prims0[ext_range_start+extra_elements+j-1] = tmp[j];
  137. extra_elements += n-1;
  138. }
  139. }
  140. set._end += extra_elements;
  141. }
  142. else
  143. {
  144. std::atomic<size_t> ext_elements;
  145. ext_elements.store(0);
  146. PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
  147. PrimInfo info(empty);
  148. for (size_t i=r.begin(); i<r.end(); i++)
  149. if (heuristic(prims0[i]))
  150. {
  151. PrimRef tmp[MAX_OPENED_CHILD_NODES];
  152. const size_t n = nodeOpenerFunc(prims0[i],tmp);
  153. const size_t ID = ext_elements.fetch_add(n-1);
  154. assert(ID + n-1 <= set.ext_range_size());
  155. for (size_t j=0; j<n; j++)
  156. info.extend_center2(tmp[j]);
  157. prims0[i] = tmp[0];
  158. for (size_t j=1; j<n; j++)
  159. prims0[ext_range_start+ID+j-1] = tmp[j];
  160. }
  161. return info;
  162. }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
  163. set.centBounds.extend(info.centBounds);
  164. assert(ext_elements.load() <= set.ext_range_size());
  165. set._end += ext_elements.load();
  166. }
  167. }
  168. __noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
  169. {
  170. const OpenHeuristic heuristic(set);
  171. size_t next_iteration_extra_elements = est_new_elements;
  172. while (next_iteration_extra_elements <= set.ext_range_size())
  173. {
  174. next_iteration_extra_elements = 0;
  175. size_t extra_elements = 0;
  176. const size_t ext_range_start = set.end();
  177. for (size_t i=set.begin(); i<set.end(); i++)
  178. {
  179. if (heuristic(prims0[i]))
  180. {
  181. PrimRef tmp[MAX_OPENED_CHILD_NODES];
  182. const size_t n = nodeOpenerFunc(prims0[i],tmp);
  183. assert(extra_elements + n-1 <= set.ext_range_size());
  184. for (size_t j=0;j<n;j++)
  185. set.extend_center2(tmp[j]);
  186. prims0[i] = tmp[0];
  187. for (size_t j=1;j<n;j++)
  188. prims0[ext_range_start+extra_elements+j-1] = tmp[j];
  189. extra_elements += n-1;
  190. for (size_t j=0; j<n; j++)
  191. if (heuristic(tmp[j]))
  192. next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
  193. }
  194. }
  195. assert( extra_elements <= set.ext_range_size());
  196. set._end += extra_elements;
  197. for (size_t i=set.begin();i<set.end();i++)
  198. assert(prims0[i].numPrimitives() > 0);
  199. if (unlikely(next_iteration_extra_elements == 0)) break;
  200. }
  201. }
  202. __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
  203. {
  204. /* single element */
  205. if (set.size() <= 1)
  206. return Split();
  207. /* disable opening if there is no overlap */
  208. const size_t D = 4;
  209. if (unlikely(set.has_ext_range() && set.size() <= D))
  210. {
  211. bool disjoint = true;
  212. for (size_t j=set.begin(); j<set.end()-1; j++) {
  213. for (size_t i=set.begin()+1; i<set.end(); i++) {
  214. if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
  215. disjoint = false; break;
  216. }
  217. }
  218. }
  219. if (disjoint) set.set_ext_range(set.end()); /* disables opening */
  220. }
  221. std::pair<size_t,bool> p(0,false);
  222. /* disable opening when all primitives are from same geometry */
  223. if (unlikely(set.has_ext_range()))
  224. {
  225. p = getProperties(set);
  226. #if EQUAL_GEOMID_STOP_CRITERIA == 1
  227. if (p.second) set.set_ext_range(set.end()); /* disable opening */
  228. #endif
  229. }
  230. /* open nodes when we have sufficient space available */
  231. if (unlikely(set.has_ext_range()))
  232. {
  233. #if USE_LOOP_OPENING == 1
  234. openNodesBasedOnExtendLoop(set,p.first);
  235. #else
  236. if (p.first <= set.ext_range_size())
  237. openNodesBasedOnExtend(set);
  238. #endif
  239. /* disable opening when insufficient space for opening a node available */
  240. if (set.ext_range_size() < max_open_size-1)
  241. set.set_ext_range(set.end()); /* disable opening */
  242. }
  243. /* find best split */
  244. return object_find(set,logBlockSize);
  245. }
  246. /*! finds the best object split */
  247. __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
  248. {
  249. if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
  250. else return parallel_object_find (set,logBlockSize);
  251. }
  252. /*! finds the best object split */
  253. __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
  254. {
  255. Binner binner(empty);
  256. const BinMapping<OBJECT_BINS> mapping(set.centBounds);
  257. binner.bin(prims0,set.begin(),set.end(),mapping);
  258. return binner.best(mapping,logBlockSize);
  259. }
  260. /*! finds the best split */
  261. __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
  262. {
  263. Binner binner(empty);
  264. const BinMapping<OBJECT_BINS> mapping(set.centBounds);
  265. const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
  266. auto body = [&] (const range<size_t>& r) -> Binner {
  267. Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
  268. };
  269. auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
  270. Binner r = b0; r.merge(b1,_mapping.size()); return r;
  271. };
  272. binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
  273. return binner.best(mapping,logBlockSize);
  274. }
  275. /*! array partitioning */
  276. __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  277. {
  278. PrimInfoExtRange set = set_i;
  279. /* valid split */
  280. if (unlikely(!split.valid())) {
  281. deterministic_order(set);
  282. splitFallback(set,lset,rset);
  283. return;
  284. }
  285. std::pair<size_t,size_t> ext_weights(0,0);
  286. /* object split */
  287. if (likely(set.size() < PARALLEL_THRESHOLD))
  288. ext_weights = sequential_object_split(split,set,lset,rset);
  289. else
  290. ext_weights = parallel_object_split(split,set,lset,rset);
  291. /* if we have an extended range, set extended child ranges and move right split range */
  292. if (unlikely(set.has_ext_range()))
  293. {
  294. setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
  295. moveExtentedRange(set,lset,rset);
  296. }
  297. }
  298. /*! array partitioning */
  299. std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  300. {
  301. const size_t begin = set.begin();
  302. const size_t end = set.end();
  303. PrimInfo local_left(empty);
  304. PrimInfo local_right(empty);
  305. const unsigned int splitPos = split.pos;
  306. const unsigned int splitDim = split.dim;
  307. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  308. const vint4 vSplitPos(splitPos);
  309. const vbool4 vSplitMask( (int)splitDimMask );
  310. size_t center = serial_partitioning(prims0,
  311. begin,end,local_left,local_right,
  312. [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
  313. [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
  314. new (&lset) PrimInfoExtRange(begin,center,center,local_left);
  315. new (&rset) PrimInfoExtRange(center,end,end,local_right);
  316. assert(area(lset.geomBounds) >= 0.0f);
  317. assert(area(rset.geomBounds) >= 0.0f);
  318. return std::pair<size_t,size_t>(local_left.size(),local_right.size());
  319. }
  320. /*! array partitioning */
  321. __noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  322. {
  323. const size_t begin = set.begin();
  324. const size_t end = set.end();
  325. PrimInfo left(empty);
  326. PrimInfo right(empty);
  327. const unsigned int splitPos = split.pos;
  328. const unsigned int splitDim = split.dim;
  329. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  330. const vint4 vSplitPos(splitPos);
  331. const vbool4 vSplitMask( (int)splitDimMask );
  332. auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
  333. const size_t center = parallel_partitioning(
  334. prims0,begin,end,EmptyTy(),left,right,isLeft,
  335. [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
  336. [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
  337. PARALLEL_PARTITION_BLOCK_SIZE);
  338. new (&lset) PrimInfoExtRange(begin,center,center,left);
  339. new (&rset) PrimInfoExtRange(center,end,end,right);
  340. assert(area(lset.geomBounds) >= 0.0f);
  341. assert(area(rset.geomBounds) >= 0.0f);
  342. return std::pair<size_t,size_t>(left.size(),right.size());
  343. }
  344. void deterministic_order(const extended_range<size_t>& set)
  345. {
  346. /* required as parallel partition destroys original primitive order */
  347. std::sort(&prims0[set.begin()],&prims0[set.end()]);
  348. }
  349. __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  350. {
  351. const size_t begin = set.begin();
  352. const size_t end = set.end();
  353. const size_t center = (begin + end)/2;
  354. PrimInfo left(empty);
  355. for (size_t i=begin; i<center; i++)
  356. left.add_center2(prims0[i]);
  357. const size_t lweight = left.end;
  358. PrimInfo right(empty);
  359. for (size_t i=center; i<end; i++)
  360. right.add_center2(prims0[i]);
  361. const size_t rweight = right.end;
  362. new (&lset) PrimInfoExtRange(begin,center,center,left);
  363. new (&rset) PrimInfoExtRange(center,end,end,right);
  364. /* if we have an extended range */
  365. if (set.has_ext_range())
  366. {
  367. setExtentedRanges(set,lset,rset,lweight,rweight);
  368. moveExtentedRange(set,lset,rset);
  369. }
  370. }
  371. private:
  372. PrimRef* const prims0;
  373. const NodeOpenerFunc& nodeOpenerFunc;
  374. size_t max_open_size;
  375. };
  376. }
  377. }