heuristic_strand_array.h 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "priminfo.h"
  5. #include "../../common/algorithms/parallel_reduce.h"
  6. #include "../../common/algorithms/parallel_partition.h"
  7. namespace embree
  8. {
  9. namespace isa
  10. {
  11. /*! Performs standard object binning */
  12. struct HeuristicStrandSplit
  13. {
  14. typedef range<size_t> Set;
  15. static const size_t PARALLEL_THRESHOLD = 10000;
  16. static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
  17. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
  18. /*! stores all information to perform some split */
  19. struct Split
  20. {
  21. /*! construct an invalid split by default */
  22. __forceinline Split()
  23. : sah(inf), axis0(zero), axis1(zero) {}
  24. /*! constructs specified split */
  25. __forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
  26. : sah(sah), axis0(axis0), axis1(axis1) {}
  27. /*! calculates standard surface area heuristic for the split */
  28. __forceinline float splitSAH() const { return sah; }
  29. /*! test if this split is valid */
  30. __forceinline bool valid() const { return sah != float(inf); }
  31. public:
  32. float sah; //!< SAH cost of the split
  33. Vec3fa axis0, axis1; //!< axis the two strands are aligned into
  34. };
  35. __forceinline HeuristicStrandSplit () // FIXME: required?
  36. : scene(nullptr), prims(nullptr) {}
  37. /*! remember prim array */
  38. __forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
  39. : scene(scene), prims(prims) {}
  40. __forceinline const Vec3fa direction(const PrimRef& prim) {
  41. return scene->get(prim.geomID())->computeDirection(prim.primID());
  42. }
  43. __forceinline const BBox3fa bounds(const PrimRef& prim) {
  44. return scene->get(prim.geomID())->vbounds(prim.primID());
  45. }
  46. __forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
  47. return scene->get(prim.geomID())->vbounds(space,prim.primID());
  48. }
  49. /*! finds the best split */
  50. const Split find(const range<size_t>& set, size_t logBlockSize)
  51. {
  52. Vec3fa axis0(0,0,1);
  53. uint64_t bestGeomPrimID = -1;
  54. /* curve with minimum ID determines first axis */
  55. for (size_t i=set.begin(); i<set.end(); i++)
  56. {
  57. const uint64_t geomprimID = prims[i].ID64();
  58. if (geomprimID >= bestGeomPrimID) continue;
  59. const Vec3fa axis = direction(prims[i]);
  60. if (sqr_length(axis) > 1E-18f) {
  61. axis0 = normalize(axis);
  62. bestGeomPrimID = geomprimID;
  63. }
  64. }
  65. /* find 2nd axis that is most misaligned with first axis and has minimum ID */
  66. float bestCos = 1.0f;
  67. Vec3fa axis1 = axis0;
  68. bestGeomPrimID = -1;
  69. for (size_t i=set.begin(); i<set.end(); i++)
  70. {
  71. const uint64_t geomprimID = prims[i].ID64();
  72. Vec3fa axisi = direction(prims[i]);
  73. float leni = length(axisi);
  74. if (leni == 0.0f) continue;
  75. axisi /= leni;
  76. float cos = abs(dot(axisi,axis0));
  77. if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
  78. bestCos = cos; axis1 = axisi;
  79. bestGeomPrimID = geomprimID;
  80. }
  81. }
  82. /* partition the two strands */
  83. size_t lnum = 0, rnum = 0;
  84. BBox3fa lbounds = empty, rbounds = empty;
  85. const LinearSpace3fa space0 = frame(axis0).transposed();
  86. const LinearSpace3fa space1 = frame(axis1).transposed();
  87. for (size_t i=set.begin(); i<set.end(); i++)
  88. {
  89. PrimRef& prim = prims[i];
  90. const Vec3fa axisi = normalize(direction(prim));
  91. const float cos0 = abs(dot(axisi,axis0));
  92. const float cos1 = abs(dot(axisi,axis1));
  93. if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
  94. else { rnum++; rbounds.extend(bounds(space1,prim)); }
  95. }
  96. /*! return an invalid split if we do not partition */
  97. if (lnum == 0 || rnum == 0)
  98. return Split(inf,axis0,axis1);
  99. /*! calculate sah for the split */
  100. const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
  101. const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
  102. const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
  103. return Split(sah,axis0,axis1);
  104. }
  105. /*! array partitioning */
  106. void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
  107. {
  108. if (!split.valid()) {
  109. deterministic_order(set);
  110. return splitFallback(set,lset,rset);
  111. }
  112. const size_t begin = set.begin();
  113. const size_t end = set.end();
  114. CentGeomBBox3fa local_left(empty);
  115. CentGeomBBox3fa local_right(empty);
  116. auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
  117. const Vec3fa axisi = normalize(direction(prim));
  118. const float cos0 = abs(dot(axisi,split.axis0));
  119. const float cos1 = abs(dot(axisi,split.axis1));
  120. return cos0 > cos1;
  121. };
  122. auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
  123. pinfo.extend(bounds(ref));
  124. };
  125. size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
  126. new (&lset) PrimInfoRange(begin,center,local_left);
  127. new (&rset) PrimInfoRange(center,end,local_right);
  128. assert(area(lset.geomBounds) >= 0.0f);
  129. assert(area(rset.geomBounds) >= 0.0f);
  130. }
  131. void deterministic_order(const Set& set)
  132. {
  133. /* required as parallel partition destroys original primitive order */
  134. std::sort(&prims[set.begin()],&prims[set.end()]);
  135. }
  136. void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
  137. {
  138. const size_t begin = set.begin();
  139. const size_t end = set.end();
  140. const size_t center = (begin + end)/2;
  141. CentGeomBBox3fa left(empty);
  142. for (size_t i=begin; i<center; i++)
  143. left.extend(bounds(prims[i]));
  144. new (&lset) PrimInfoRange(begin,center,left);
  145. CentGeomBBox3fa right(empty);
  146. for (size_t i=center; i<end; i++)
  147. right.extend(bounds(prims[i]));
  148. new (&rset) PrimInfoRange(center,end,right);
  149. }
  150. private:
  151. Scene* const scene;
  152. PrimRef* const prims;
  153. };
  154. }
  155. }