heuristic_binning_array_unaligned.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "heuristic_binning.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. /*! Performs standard object binning */
  10. template<typename PrimRef, size_t BINS>
  11. struct UnalignedHeuristicArrayBinningSAH
  12. {
  13. typedef BinSplit<BINS> Split;
  14. typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
  15. typedef range<size_t> Set;
  16. __forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?
  17. : scene(nullptr), prims(nullptr) {}
  18. /*! remember prim array */
  19. __forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)
  20. : scene(scene), prims(prims) {}
  21. const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)
  22. {
  23. Vec3fa axis(0,0,1);
  24. uint64_t bestGeomPrimID = -1;
  25. /*! find curve with minimum ID that defines valid direction */
  26. for (size_t i=set.begin(); i<set.end(); i++)
  27. {
  28. const unsigned int geomID = prims[i].geomID();
  29. const unsigned int primID = prims[i].primID();
  30. const uint64_t geomprimID = prims[i].ID64();
  31. if (geomprimID >= bestGeomPrimID) continue;
  32. const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);
  33. if (sqr_length(axis1) > 1E-18f) {
  34. axis = normalize(axis1);
  35. bestGeomPrimID = geomprimID;
  36. }
  37. }
  38. return frame(axis).transposed();
  39. }
  40. const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)
  41. {
  42. auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
  43. {
  44. CentGeomBBox3fa bounds(empty);
  45. for (size_t i=r.begin(); i<r.end(); i++) {
  46. Geometry* mesh = scene->get(prims[i].geomID());
  47. bounds.extend(mesh->vbounds(space,prims[i].primID()));
  48. }
  49. return bounds;
  50. };
  51. const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),
  52. CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
  53. return PrimInfo(set.begin(),set.end(),bounds);
  54. }
  55. struct BinBoundsAndCenter
  56. {
  57. __forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)
  58. : scene(scene), space(space) {}
  59. /*! returns center for binning */
  60. __forceinline Vec3fa binCenter(const PrimRef& ref) const
  61. {
  62. Geometry* mesh = (Geometry*) scene->get(ref.geomID());
  63. BBox3fa bounds = mesh->vbounds(space,ref.primID());
  64. return embree::center2(bounds);
  65. }
  66. /*! returns bounds and centroid used for binning */
  67. __forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const
  68. {
  69. Geometry* mesh = (Geometry*) scene->get(ref.geomID());
  70. BBox3fa bounds = mesh->vbounds(space,ref.primID());
  71. bounds_o = bounds;
  72. center_o = embree::center2(bounds);
  73. }
  74. private:
  75. Scene* scene;
  76. const LinearSpace3fa space;
  77. };
  78. /*! finds the best split */
  79. __forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)
  80. {
  81. if (likely(pinfo.size() < 10000))
  82. return find_template<false>(pinfo,logBlockSize,space);
  83. else
  84. return find_template<true>(pinfo,logBlockSize,space);
  85. }
  86. /*! finds the best split */
  87. template<bool parallel>
  88. const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)
  89. {
  90. Binner binner(empty);
  91. const BinMapping<BINS> mapping(set);
  92. BinBoundsAndCenter binBoundsAndCenter(scene,space);
  93. bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);
  94. return binner.best(mapping,logBlockSize);
  95. }
  96. /*! array partitioning */
  97. __forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
  98. {
  99. if (likely(set.size() < 10000))
  100. split_template<false>(split,space,set,lset,rset);
  101. else
  102. split_template<true>(split,space,set,lset,rset);
  103. }
  104. /*! array partitioning */
  105. template<bool parallel>
  106. __forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
  107. {
  108. if (!split.valid()) {
  109. deterministic_order(set);
  110. return splitFallback(set,lset,rset);
  111. }
  112. const size_t begin = set.begin();
  113. const size_t end = set.end();
  114. CentGeomBBox3fa local_left(empty);
  115. CentGeomBBox3fa local_right(empty);
  116. const int splitPos = split.pos;
  117. const int splitDim = split.dim;
  118. BinBoundsAndCenter binBoundsAndCenter(scene,space);
  119. size_t center = 0;
  120. if (likely(set.size() < 10000))
  121. center = serial_partitioning(prims,begin,end,local_left,local_right,
  122. [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
  123. [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
  124. else
  125. center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,
  126. [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
  127. [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
  128. [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
  129. 128);
  130. new (&lset) PrimInfoRange(begin,center,local_left);
  131. new (&rset) PrimInfoRange(center,end,local_right);
  132. assert(area(lset.geomBounds) >= 0.0f);
  133. assert(area(rset.geomBounds) >= 0.0f);
  134. }
  135. void deterministic_order(const range<size_t>& set)
  136. {
  137. /* required as parallel partition destroys original primitive order */
  138. std::sort(&prims[set.begin()],&prims[set.end()]);
  139. }
  140. void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)
  141. {
  142. const size_t begin = set.begin();
  143. const size_t end = set.end();
  144. const size_t center = (begin + end)/2;
  145. CentGeomBBox3fa left(empty);
  146. for (size_t i=begin; i<center; i++)
  147. left.extend_center2(prims[i]);
  148. new (&lset) PrimInfoRange(begin,center,left);
  149. CentGeomBBox3fa right(empty);
  150. for (size_t i=center; i<end; i++)
  151. right.extend_center2(prims[i]);
  152. new (&rset) PrimInfoRange(center,end,right);
  153. }
  154. private:
  155. Scene* const scene;
  156. PrimRef* const prims;
  157. };
  158. /*! Performs standard object binning */
  159. template<typename PrimRefMB, size_t BINS>
  160. struct UnalignedHeuristicArrayBinningMB
  161. {
  162. typedef BinSplit<BINS> Split;
  163. typedef typename PrimRefMB::BBox BBox;
  164. typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
  165. static const size_t PARALLEL_THRESHOLD = 3 * 1024;
  166. static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
  167. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  168. UnalignedHeuristicArrayBinningMB(Scene* scene)
  169. : scene(scene) {}
  170. const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)
  171. {
  172. Vec3fa axis0(0,0,1);
  173. uint64_t bestGeomPrimID = -1;
  174. /*! find curve with minimum ID that defines valid direction */
  175. for (size_t i=set.begin(); i<set.end(); i++)
  176. {
  177. const PrimRefMB& prim = (*set.prims)[i];
  178. const unsigned int geomID = prim.geomID();
  179. const unsigned int primID = prim.primID();
  180. const uint64_t geomprimID = prim.ID64();
  181. if (geomprimID >= bestGeomPrimID) continue;
  182. const Geometry* mesh = scene->get(geomID);
  183. const range<int> tbounds = mesh->timeSegmentRange(set.time_range);
  184. if (tbounds.size() == 0) continue;
  185. const size_t t = (tbounds.begin()+tbounds.end())/2;
  186. const Vec3fa axis1 = mesh->computeDirection(primID,t);
  187. if (sqr_length(axis1) > 1E-18f) {
  188. axis0 = normalize(axis1);
  189. bestGeomPrimID = geomprimID;
  190. }
  191. }
  192. return frame(axis0).transposed();
  193. }
  194. struct BinBoundsAndCenter
  195. {
  196. __forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)
  197. : scene(scene), time_range(time_range), space(space) {}
  198. /*! returns center for binning */
  199. template<typename PrimRef>
  200. __forceinline Vec3fa binCenter(const PrimRef& ref) const
  201. {
  202. Geometry* mesh = scene->get(ref.geomID());
  203. LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
  204. return center2(lbounds.interpolate(0.5f));
  205. }
  206. /*! returns bounds and centroid used for binning */
  207. __noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
  208. {
  209. Geometry* mesh = scene->get(ref.geomID());
  210. LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
  211. bounds_o = lbounds.interpolate(0.5f);
  212. center_o = center2(bounds_o);
  213. }
  214. /*! returns bounds and centroid used for binning */
  215. __noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
  216. {
  217. Geometry* mesh = scene->get(ref.geomID());
  218. LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
  219. bounds_o = lbounds;
  220. center_o = center2(lbounds.interpolate(0.5f));
  221. }
  222. private:
  223. Scene* scene;
  224. BBox1f time_range;
  225. const LinearSpace3fa space;
  226. };
  227. /*! finds the best split */
  228. const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)
  229. {
  230. BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
  231. ObjectBinner binner(empty);
  232. const BinMapping<BINS> mapping(set.size(),set.centBounds);
  233. bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);
  234. Split osplit = binner.best(mapping,logBlockSize);
  235. osplit.sah *= set.time_range.size();
  236. if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
  237. return osplit;
  238. }
  239. /*! array partitioning */
  240. __forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)
  241. {
  242. BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
  243. const size_t begin = set.begin();
  244. const size_t end = set.end();
  245. PrimInfoMB left = empty;
  246. PrimInfoMB right = empty;
  247. const vint4 vSplitPos(split.pos);
  248. const vbool4 vSplitMask(1 << split.dim);
  249. auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };
  250. auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
  251. auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
  252. size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
  253. new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);
  254. new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
  255. }
  256. private:
  257. Scene* scene;
  258. };
  259. }
  260. }