quadi.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "primitive.h"
  5. #include "../common/scene.h"
  6. namespace embree
  7. {
  8. /* Stores M quads from an indexed face set */
  9. template <int M>
  10. struct QuadMi
  11. {
  12. /* Virtual interface to query information about the quad type */
  13. struct Type : public PrimitiveType
  14. {
  15. const char* name() const;
  16. size_t sizeActive(const char* This) const;
  17. size_t sizeTotal(const char* This) const;
  18. size_t getBytes(const char* This) const;
  19. };
  20. static Type type;
  21. public:
  22. /* primitive supports multiple time segments */
  23. static const bool singleTimeSegment = false;
  24. /* Returns maximum number of stored quads */
  25. static __forceinline size_t max_size() { return M; }
  26. /* Returns required number of primitive blocks for N primitives */
  27. static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
  28. public:
  29. /* Default constructor */
  30. __forceinline QuadMi() { }
  31. /* Construction from vertices and IDs */
  32. __forceinline QuadMi(const vuint<M>& v0,
  33. const vuint<M>& v1,
  34. const vuint<M>& v2,
  35. const vuint<M>& v3,
  36. const vuint<M>& geomIDs,
  37. const vuint<M>& primIDs)
  38. #if defined(EMBREE_COMPACT_POLYS)
  39. : geomIDs(geomIDs), primIDs(primIDs) {}
  40. #else
  41. : v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {}
  42. #endif
  43. /* Returns a mask that tells which quads are valid */
  44. __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
  45. /* Returns if the specified quad is valid */
  46. __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
  47. /* Returns the number of stored quads */
  48. __forceinline size_t size() const { return bsf(~movemask(valid())); }
  49. /* Returns the geometry IDs */
  50. __forceinline vuint<M>& geomID() { return geomIDs; }
  51. __forceinline const vuint<M>& geomID() const { return geomIDs; }
  52. __forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; }
  53. /* Returns the primitive IDs */
  54. __forceinline vuint<M>& primID() { return primIDs; }
  55. __forceinline const vuint<M>& primID() const { return primIDs; }
  56. __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
  57. /* Calculate the bounds of the quads */
  58. __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
  59. {
  60. BBox3fa bounds = empty;
  61. for (size_t i=0; i<M && valid(i); i++) {
  62. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
  63. bounds.extend(mesh->bounds(primID(i),itime));
  64. }
  65. return bounds;
  66. }
  67. /* Calculate the linear bounds of the primitive */
  68. __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) {
  69. return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
  70. }
  71. __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
  72. {
  73. LBBox3fa allBounds = empty;
  74. for (size_t i=0; i<M && valid(i); i++)
  75. {
  76. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
  77. allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
  78. }
  79. return allBounds;
  80. }
  81. __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
  82. {
  83. LBBox3fa allBounds = empty;
  84. for (size_t i=0; i<M && valid(i); i++)
  85. {
  86. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
  87. allBounds.extend(mesh->linearBounds(primID(i), time_range));
  88. }
  89. return allBounds;
  90. }
  91. /* Fill quad from quad list */
  92. template<typename PrimRefT>
  93. __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
  94. {
  95. vuint<M> geomID = -1, primID = -1;
  96. const PrimRefT* prim = &prims[begin];
  97. vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
  98. for (size_t i=0; i<M; i++)
  99. {
  100. if (begin<end) {
  101. geomID[i] = prim->geomID();
  102. primID[i] = prim->primID();
  103. #if !defined(EMBREE_COMPACT_POLYS)
  104. const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID());
  105. const QuadMesh::Quad& q = mesh->quad(prim->primID());
  106. unsigned int_stride = mesh->vertices0.getStride()/4;
  107. v0[i] = q.v[0] * int_stride;
  108. v1[i] = q.v[1] * int_stride;
  109. v2[i] = q.v[2] * int_stride;
  110. v3[i] = q.v[3] * int_stride;
  111. #endif
  112. begin++;
  113. } else {
  114. assert(i);
  115. if (likely(i > 0)) {
  116. geomID[i] = geomID[0]; // always valid geomIDs
  117. primID[i] = -1; // indicates invalid data
  118. v0[i] = v0[0];
  119. v1[i] = v0[0];
  120. v2[i] = v0[0];
  121. v3[i] = v0[0];
  122. }
  123. }
  124. if (begin<end) prim = &prims[begin];
  125. }
  126. new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store
  127. }
  128. __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
  129. {
  130. fill(prims, begin, end, scene);
  131. return linearBounds(scene, itime);
  132. }
  133. __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
  134. {
  135. fill(prims, begin, end, scene);
  136. return linearBounds(scene, time_range);
  137. }
  138. friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) {
  139. return cout << "QuadMi<" << M << ">( "
  140. #if !defined(EMBREE_COMPACT_POLYS)
  141. << "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", "
  142. #endif
  143. << "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )";
  144. }
  145. protected:
  146. #if !defined(EMBREE_COMPACT_POLYS)
  147. vuint<M> v0_; // 4 byte offset of 1st vertex
  148. vuint<M> v1_; // 4 byte offset of 2nd vertex
  149. vuint<M> v2_; // 4 byte offset of 3rd vertex
  150. vuint<M> v3_; // 4 byte offset of 4th vertex
  151. #endif
  152. vuint<M> geomIDs; // geometry ID of mesh
  153. vuint<M> primIDs; // primitive ID of primitive inside mesh
  154. };
  155. namespace isa
  156. {
  157. template<int M>
  158. struct QuadMi : public embree::QuadMi<M>
  159. {
  160. #if !defined(EMBREE_COMPACT_POLYS)
  161. using embree::QuadMi<M>::v0_;
  162. using embree::QuadMi<M>::v1_;
  163. using embree::QuadMi<M>::v2_;
  164. using embree::QuadMi<M>::v3_;
  165. #endif
  166. using embree::QuadMi<M>::geomIDs;
  167. using embree::QuadMi<M>::primIDs;
  168. using embree::QuadMi<M>::geomID;
  169. using embree::QuadMi<M>::primID;
  170. using embree::QuadMi<M>::valid;
  171. template<int vid>
  172. __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
  173. {
  174. #if defined(EMBREE_COMPACT_POLYS)
  175. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
  176. const QuadMesh::Quad& quad = mesh->quad(primID(index));
  177. return (Vec3f) mesh->vertices[0][quad.v[vid]];
  178. #else
  179. const vuint<M>& v = getVertexOffset<vid>();
  180. const float* vertices = scene->vertices[geomID(index)];
  181. return (Vec3f&) vertices[v[index]];
  182. #endif
  183. }
  184. template<int vid, typename T>
  185. __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
  186. {
  187. #if defined(EMBREE_COMPACT_POLYS)
  188. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
  189. const QuadMesh::Quad& quad = mesh->quad(primID(index));
  190. const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]];
  191. const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]];
  192. #else
  193. const vuint<M>& v = getVertexOffset<vid>();
  194. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
  195. const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
  196. const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
  197. const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
  198. const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
  199. #endif
  200. const Vec3<T> p0(v0.x,v0.y,v0.z);
  201. const Vec3<T> p1(v1.x,v1.y,v1.z);
  202. return lerp(p0,p1,ftime);
  203. }
  204. template<int vid, int K, typename T>
  205. __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
  206. {
  207. Vec3<T> p0, p1;
  208. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
  209. for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
  210. {
  211. #if defined(EMBREE_COMPACT_POLYS)
  212. const QuadMesh::Quad& quad = mesh->quad(primID(index));
  213. const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]];
  214. const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]];
  215. #else
  216. const vuint<M>& v = getVertexOffset<vid>();
  217. const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
  218. const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
  219. const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
  220. const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
  221. #endif
  222. p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
  223. p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
  224. }
  225. return (T(one)-ftime)*p0 + ftime*p1;
  226. }
  227. struct Quad {
  228. vfloat4 v0,v1,v2,v3;
  229. };
  230. #if defined(EMBREE_COMPACT_POLYS)
  231. __forceinline Quad loadQuad(const int i, const Scene* const scene) const
  232. {
  233. const unsigned int geomID = geomIDs[i];
  234. const unsigned int primID = primIDs[i];
  235. if (unlikely(primID == -1)) return { zero, zero, zero, zero };
  236. const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
  237. const QuadMesh::Quad& quad = mesh->quad(primID);
  238. const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]];
  239. const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]];
  240. const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]];
  241. const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]];
  242. return { v0, v1, v2, v3 };
  243. }
  244. __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
  245. {
  246. const unsigned int geomID = geomIDs[i];
  247. const unsigned int primID = primIDs[i];
  248. if (unlikely(primID == -1)) return { zero, zero, zero, zero };
  249. const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
  250. const QuadMesh::Quad& quad = mesh->quad(primID);
  251. const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]];
  252. const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]];
  253. const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]];
  254. const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]];
  255. return { v0, v1, v2, v3 };
  256. }
  257. #else
  258. __forceinline Quad loadQuad(const int i, const Scene* const scene) const
  259. {
  260. const float* vertices = scene->vertices[geomID(i)];
  261. const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
  262. const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
  263. const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
  264. const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
  265. return { v0, v1, v2, v3 };
  266. }
  267. __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
  268. {
  269. const unsigned int geomID = geomIDs[i];
  270. const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
  271. const float* vertices = (const float*) mesh->vertexPtr(0,itime);
  272. const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
  273. const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
  274. const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
  275. const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
  276. return { v0, v1, v2, v3 };
  277. }
  278. #endif
  279. /* Gather the quads */
  280. __forceinline void gather(Vec3vf<M>& p0,
  281. Vec3vf<M>& p1,
  282. Vec3vf<M>& p2,
  283. Vec3vf<M>& p3,
  284. const Scene *const scene) const;
  285. #if defined(__AVX512F__)
  286. __forceinline void gather(Vec3vf16& p0,
  287. Vec3vf16& p1,
  288. Vec3vf16& p2,
  289. Vec3vf16& p3,
  290. const Scene *const scene) const;
  291. #endif
  292. template<int K>
  293. #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
  294. __noinline
  295. #else
  296. __forceinline
  297. #endif
  298. void gather(const vbool<K>& valid,
  299. Vec3vf<K>& p0,
  300. Vec3vf<K>& p1,
  301. Vec3vf<K>& p2,
  302. Vec3vf<K>& p3,
  303. const size_t index,
  304. const Scene* const scene,
  305. const vfloat<K>& time) const
  306. {
  307. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
  308. vfloat<K> ftime;
  309. const vint<K> itime = mesh->timeSegment<K>(time, ftime);
  310. const size_t first = bsf(movemask(valid));
  311. if (likely(all(valid,itime[first] == itime)))
  312. {
  313. p0 = getVertex<0>(index, scene, itime[first], ftime);
  314. p1 = getVertex<1>(index, scene, itime[first], ftime);
  315. p2 = getVertex<2>(index, scene, itime[first], ftime);
  316. p3 = getVertex<3>(index, scene, itime[first], ftime);
  317. }
  318. else
  319. {
  320. p0 = getVertex<0,K>(valid, index, scene, itime, ftime);
  321. p1 = getVertex<1,K>(valid, index, scene, itime, ftime);
  322. p2 = getVertex<2,K>(valid, index, scene, itime, ftime);
  323. p3 = getVertex<3,K>(valid, index, scene, itime, ftime);
  324. }
  325. }
  326. __forceinline void gather(Vec3vf<M>& p0,
  327. Vec3vf<M>& p1,
  328. Vec3vf<M>& p2,
  329. Vec3vf<M>& p3,
  330. const QuadMesh* mesh,
  331. const Scene *const scene,
  332. const int itime) const;
  333. __forceinline void gather(Vec3vf<M>& p0,
  334. Vec3vf<M>& p1,
  335. Vec3vf<M>& p2,
  336. Vec3vf<M>& p3,
  337. const Scene *const scene,
  338. const float time) const;
  339. /* Updates the primitive */
  340. __forceinline BBox3fa update(QuadMesh* mesh)
  341. {
  342. BBox3fa bounds = empty;
  343. for (size_t i=0; i<M; i++)
  344. {
  345. if (!valid(i)) break;
  346. const unsigned primId = primID(i);
  347. const QuadMesh::Quad& q = mesh->quad(primId);
  348. const Vec3fa p0 = mesh->vertex(q.v[0]);
  349. const Vec3fa p1 = mesh->vertex(q.v[1]);
  350. const Vec3fa p2 = mesh->vertex(q.v[2]);
  351. const Vec3fa p3 = mesh->vertex(q.v[3]);
  352. bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
  353. }
  354. return bounds;
  355. }
  356. private:
  357. #if !defined(EMBREE_COMPACT_POLYS)
  358. template<int N> const vuint<M>& getVertexOffset() const;
  359. #endif
  360. };
  361. #if !defined(EMBREE_COMPACT_POLYS)
  362. template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; }
  363. template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; }
  364. template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; }
  365. template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; }
  366. #endif
  367. template<>
  368. __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
  369. Vec3vf4& p1,
  370. Vec3vf4& p2,
  371. Vec3vf4& p3,
  372. const Scene *const scene) const
  373. {
  374. prefetchL1(((char*)this)+0*64);
  375. prefetchL1(((char*)this)+1*64);
  376. const Quad tri0 = loadQuad(0,scene);
  377. const Quad tri1 = loadQuad(1,scene);
  378. const Quad tri2 = loadQuad(2,scene);
  379. const Quad tri3 = loadQuad(3,scene);
  380. transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
  381. transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
  382. transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
  383. transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
  384. }
  385. template<>
  386. __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
  387. Vec3vf4& p1,
  388. Vec3vf4& p2,
  389. Vec3vf4& p3,
  390. const QuadMesh* mesh,
  391. const Scene *const scene,
  392. const int itime) const
  393. {
  394. // FIXME: for trianglei there all geometries are identical, is this the case here too?
  395. const Quad tri0 = loadQuad(0,itime,scene);
  396. const Quad tri1 = loadQuad(1,itime,scene);
  397. const Quad tri2 = loadQuad(2,itime,scene);
  398. const Quad tri3 = loadQuad(3,itime,scene);
  399. transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
  400. transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
  401. transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
  402. transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
  403. }
  404. template<>
  405. __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
  406. Vec3vf4& p1,
  407. Vec3vf4& p2,
  408. Vec3vf4& p3,
  409. const Scene *const scene,
  410. const float time) const
  411. {
  412. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical
  413. float ftime;
  414. const int itime = mesh->timeSegment(time, ftime);
  415. Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime);
  416. Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1);
  417. p0 = lerp(a0,b0,vfloat4(ftime));
  418. p1 = lerp(a1,b1,vfloat4(ftime));
  419. p2 = lerp(a2,b2,vfloat4(ftime));
  420. p3 = lerp(a3,b3,vfloat4(ftime));
  421. }
  422. }
  423. template<int M>
  424. typename QuadMi<M>::Type QuadMi<M>::type;
  425. typedef QuadMi<4> Quad4i;
  426. }