linei.h 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "primitive.h"
  5. namespace embree
  6. {
  7. template<int M>
  8. struct LineMi
  9. {
  10. /* Virtual interface to query information about the line segment type */
  11. struct Type : public PrimitiveType
  12. {
  13. const char* name() const;
  14. size_t sizeActive(const char* This) const;
  15. size_t sizeTotal(const char* This) const;
  16. size_t getBytes(const char* This) const;
  17. };
  18. static Type type;
  19. public:
  20. /* primitive supports multiple time segments */
  21. static const bool singleTimeSegment = false;
  22. /* Returns maximum number of stored line segments */
  23. static __forceinline size_t max_size() { return M; }
  24. /* Returns required number of primitive blocks for N line segments */
  25. static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
  26. /* Returns required number of bytes for N line segments */
  27. static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); }
  28. public:
  29. /* Default constructor */
  30. __forceinline LineMi() { }
  31. /* Construction from vertices and IDs */
  32. __forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype)
  33. : gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs)
  34. {
  35. assert(all(vuint<M>(geomID()) == geomIDs));
  36. }
  37. /* Returns a mask that tells which line segments are valid */
  38. __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
  39. /* Returns if the specified line segment is valid */
  40. __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
  41. /* Returns the number of stored line segments */
  42. __forceinline size_t size() const { return bsf(~movemask(valid())); }
  43. /* Returns the geometry IDs */
  44. //template<class T>
  45. //static __forceinline T unmask(T &index) { return index & 0x3fffffff; }
  46. __forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; }
  47. //__forceinline vuint<M> geomID() { return unmask(geomIDs); }
  48. //__forceinline const vuint<M> geomID() const { return unmask(geomIDs); }
  49. //__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); }
  50. /* Returns the primitive IDs */
  51. __forceinline vuint<M>& primID() { return primIDs; }
  52. __forceinline const vuint<M>& primID() const { return primIDs; }
  53. __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
  54. /* gather the line segments */
  55. __forceinline void gather(Vec4vf<M>& p0,
  56. Vec4vf<M>& p1,
  57. const LineSegments* geom) const;
  58. __forceinline void gatheri(Vec4vf<M>& p0,
  59. Vec4vf<M>& p1,
  60. const LineSegments* geom,
  61. const int itime) const;
  62. __forceinline void gather(Vec4vf<M>& p0,
  63. Vec4vf<M>& p1,
  64. const LineSegments* geom,
  65. float time) const;
  66. /* gather the line segments with lateral info */
  67. __forceinline void gather(Vec4vf<M>& p0,
  68. Vec4vf<M>& p1,
  69. Vec4vf<M>& pL,
  70. Vec4vf<M>& pR,
  71. const LineSegments* geom) const;
  72. __forceinline void gatheri(Vec4vf<M>& p0,
  73. Vec4vf<M>& p1,
  74. Vec4vf<M>& pL,
  75. Vec4vf<M>& pR,
  76. const LineSegments* geom,
  77. const int itime) const;
  78. __forceinline void gather(Vec4vf<M>& p0,
  79. Vec4vf<M>& p1,
  80. Vec4vf<M>& pL,
  81. Vec4vf<M>& pR,
  82. const LineSegments* geom,
  83. float time) const;
  84. __forceinline void gather(Vec4vf<M>& p0,
  85. Vec4vf<M>& p1,
  86. vbool<M>& cL,
  87. vbool<M>& cR,
  88. const LineSegments* geom) const;
  89. __forceinline void gatheri(Vec4vf<M>& p0,
  90. Vec4vf<M>& p1,
  91. vbool<M>& cL,
  92. vbool<M>& cR,
  93. const LineSegments* geom,
  94. const int itime) const;
  95. __forceinline void gather(Vec4vf<M>& p0,
  96. Vec4vf<M>& p1,
  97. vbool<M>& cL,
  98. vbool<M>& cR,
  99. const LineSegments* geom,
  100. float time) const;
  101. /* Calculate the bounds of the line segments */
  102. __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
  103. {
  104. BBox3fa bounds = empty;
  105. for (size_t i=0; i<M && valid(i); i++)
  106. {
  107. const LineSegments* geom = scene->get<LineSegments>(geomID(i));
  108. const Vec3ff& p0 = geom->vertex(v0[i]+0,itime);
  109. const Vec3ff& p1 = geom->vertex(v0[i]+1,itime);
  110. BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
  111. b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
  112. bounds.extend(b);
  113. }
  114. return bounds;
  115. }
  116. /* Calculate the linear bounds of the primitive */
  117. __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
  118. return LBBox3fa(bounds(scene,itime+0), bounds(scene,itime+1));
  119. }
  120. __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) {
  121. LBBox3fa allBounds = empty;
  122. for (size_t i=0; i<M && valid(i); i++)
  123. {
  124. const LineSegments* geom = scene->get<LineSegments>(geomID(i));
  125. allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
  126. }
  127. return allBounds;
  128. }
  129. __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
  130. {
  131. LBBox3fa allBounds = empty;
  132. for (size_t i=0; i<M && valid(i); i++)
  133. {
  134. const LineSegments* geom = scene->get<LineSegments>(geomID((unsigned int)i));
  135. allBounds.extend(geom->linearBounds(primID(i), time_range));
  136. }
  137. return allBounds;
  138. }
  139. /* Fill line segment from line segment list */
  140. template<typename PrimRefT>
  141. __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
  142. {
  143. Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
  144. vuint<M> geomID, primID;
  145. vuint<M> v0;
  146. unsigned short leftExists = 0;
  147. unsigned short rightExists = 0;
  148. const PrimRefT* prim = &prims[begin];
  149. for (size_t i=0; i<M; i++)
  150. {
  151. const LineSegments* geom = scene->get<LineSegments>(prim->geomID());
  152. if (begin<end) {
  153. geomID[i] = prim->geomID();
  154. primID[i] = prim->primID();
  155. v0[i] = geom->segment(prim->primID());
  156. leftExists |= geom->segmentLeftExists(primID[i]) << i;
  157. rightExists |= geom->segmentRightExists(primID[i]) << i;
  158. begin++;
  159. } else {
  160. assert(i);
  161. if (i>0) {
  162. geomID[i] = geomID[i-1];
  163. primID[i] = -1;
  164. v0[i] = v0[i-1];
  165. }
  166. }
  167. if (begin<end) prim = &prims[begin]; // FIXME: remove this line
  168. }
  169. new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store
  170. }
  171. template<typename BVH, typename Allocator>
  172. __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
  173. {
  174. size_t start = set.begin();
  175. size_t items = LineMi::blocks(set.size());
  176. size_t numbytes = LineMi::bytes(set.size());
  177. LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
  178. for (size_t i=0; i<items; i++) {
  179. accel[i].fill(prims,start,set.end(),bvh->scene);
  180. }
  181. return bvh->encodeLeaf((char*)accel,items);
  182. };
  183. __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
  184. {
  185. fill(prims,begin,end,scene);
  186. return linearBounds(scene,itime);
  187. }
  188. __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
  189. {
  190. fill(prims,begin,end,scene);
  191. return linearBounds(scene,time_range);
  192. }
  193. template<typename BVH, typename SetMB, typename Allocator>
  194. __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
  195. {
  196. size_t start = prims.begin();
  197. size_t end = prims.end();
  198. size_t items = LineMi::blocks(prims.size());
  199. size_t numbytes = LineMi::bytes(prims.size());
  200. LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
  201. const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items);
  202. LBBox3fa bounds = empty;
  203. for (size_t i=0; i<items; i++)
  204. bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
  205. return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
  206. };
  207. /* Updates the primitive */
  208. __forceinline BBox3fa update(LineSegments* geom)
  209. {
  210. BBox3fa bounds = empty;
  211. for (size_t i=0; i<M && valid(i); i++)
  212. {
  213. const Vec3ff& p0 = geom->vertex(v0[i]+0);
  214. const Vec3ff& p1 = geom->vertex(v0[i]+1);
  215. BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
  216. b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
  217. bounds.extend(b);
  218. }
  219. return bounds;
  220. }
  221. /*! output operator */
  222. friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) {
  223. return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}";
  224. }
  225. public:
  226. unsigned char gtype;
  227. unsigned char m;
  228. unsigned int sharedGeomID;
  229. unsigned short leftExists, rightExists;
  230. vuint<M> v0; // index of start vertex
  231. private:
  232. vuint<M> primIDs; // primitive ID
  233. };
  234. template<>
  235. __forceinline void LineMi<4>::gather(Vec4vf4& p0,
  236. Vec4vf4& p1,
  237. const LineSegments* geom) const
  238. {
  239. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
  240. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
  241. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
  242. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
  243. transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
  244. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
  245. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
  246. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
  247. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
  248. transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
  249. }
  250. template<>
  251. __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
  252. Vec4vf4& p1,
  253. const LineSegments* geom,
  254. const int itime) const
  255. {
  256. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
  257. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
  258. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
  259. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
  260. transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
  261. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
  262. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
  263. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
  264. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
  265. transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
  266. }
  267. template<>
  268. __forceinline void LineMi<4>::gather(Vec4vf4& p0,
  269. Vec4vf4& p1,
  270. const LineSegments* geom,
  271. float time) const
  272. {
  273. float ftime;
  274. const int itime = geom->timeSegment(time, ftime);
  275. Vec4vf4 a0,a1;
  276. gatheri(a0,a1,geom,itime);
  277. Vec4vf4 b0,b1;
  278. gatheri(b0,b1,geom,itime+1);
  279. p0 = lerp(a0,b0,vfloat4(ftime));
  280. p1 = lerp(a1,b1,vfloat4(ftime));
  281. }
  282. template<>
  283. __forceinline void LineMi<4>::gather(Vec4vf4& p0,
  284. Vec4vf4& p1,
  285. vbool4& cL,
  286. vbool4& cR,
  287. const LineSegments* geom) const
  288. {
  289. gather(p0,p1,geom);
  290. cL = !vbool4(leftExists);
  291. cR = !vbool4(rightExists);
  292. }
  293. template<>
  294. __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
  295. Vec4vf4& p1,
  296. vbool4& cL,
  297. vbool4& cR,
  298. const LineSegments* geom,
  299. const int itime) const
  300. {
  301. gatheri(p0,p1,geom,itime);
  302. cL = !vbool4(leftExists);
  303. cR = !vbool4(rightExists);
  304. }
  305. template<>
  306. __forceinline void LineMi<4>::gather(Vec4vf4& p0,
  307. Vec4vf4& p1,
  308. vbool4& cL,
  309. vbool4& cR,
  310. const LineSegments* geom,
  311. float time) const
  312. {
  313. float ftime;
  314. const int itime = geom->timeSegment(time, ftime);
  315. Vec4vf4 a0,a1;
  316. gatheri(a0,a1,geom,itime);
  317. Vec4vf4 b0,b1;
  318. gatheri(b0,b1,geom,itime+1);
  319. p0 = lerp(a0,b0,vfloat4(ftime));
  320. p1 = lerp(a1,b1,vfloat4(ftime));
  321. cL = !vbool4(leftExists);
  322. cR = !vbool4(rightExists);
  323. }
  324. template<>
  325. __forceinline void LineMi<4>::gather(Vec4vf4& p0,
  326. Vec4vf4& p1,
  327. Vec4vf4& pL,
  328. Vec4vf4& pR,
  329. const LineSegments* geom) const
  330. {
  331. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
  332. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
  333. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
  334. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
  335. transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
  336. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
  337. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
  338. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
  339. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
  340. transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
  341. const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
  342. const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
  343. const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
  344. const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
  345. transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
  346. const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
  347. const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
  348. const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
  349. const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
  350. transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
  351. }
  352. template<>
  353. __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
  354. Vec4vf4& p1,
  355. Vec4vf4& pL,
  356. Vec4vf4& pR,
  357. const LineSegments* geom,
  358. const int itime) const
  359. {
  360. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
  361. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
  362. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
  363. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
  364. transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
  365. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
  366. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
  367. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
  368. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
  369. transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
  370. const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
  371. const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
  372. const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
  373. const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
  374. transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
  375. const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
  376. const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
  377. const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
  378. const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
  379. transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
  380. }
  381. template<>
  382. __forceinline void LineMi<4>::gather(Vec4vf4& p0,
  383. Vec4vf4& p1,
  384. Vec4vf4& pL,
  385. Vec4vf4& pR,
  386. const LineSegments* geom,
  387. float time) const
  388. {
  389. float ftime;
  390. const int itime = geom->timeSegment(time, ftime);
  391. Vec4vf4 a0,a1,aL,aR;
  392. gatheri(a0,a1,aL,aR,geom,itime);
  393. Vec4vf4 b0,b1,bL,bR;
  394. gatheri(b0,b1,bL,bR,geom,itime+1);
  395. p0 = lerp(a0,b0,vfloat4(ftime));
  396. p1 = lerp(a1,b1,vfloat4(ftime));
  397. pL = lerp(aL,bL,vfloat4(ftime));
  398. pR = lerp(aR,bR,vfloat4(ftime));
  399. }
  400. #if defined(__AVX__)
  401. template<>
  402. __forceinline void LineMi<8>::gather(Vec4vf8& p0,
  403. Vec4vf8& p1,
  404. const LineSegments* geom) const
  405. {
  406. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
  407. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
  408. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
  409. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
  410. const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
  411. const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
  412. const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
  413. const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
  414. transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
  415. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
  416. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
  417. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
  418. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
  419. const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
  420. const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
  421. const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
  422. const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
  423. transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
  424. }
  425. template<>
  426. __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
  427. Vec4vf8& p1,
  428. const LineSegments* geom,
  429. const int itime) const
  430. {
  431. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
  432. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
  433. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
  434. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
  435. const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
  436. const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
  437. const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
  438. const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
  439. transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
  440. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
  441. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
  442. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
  443. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
  444. const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
  445. const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
  446. const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
  447. const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
  448. transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
  449. }
  450. template<>
  451. __forceinline void LineMi<8>::gather(Vec4vf8& p0,
  452. Vec4vf8& p1,
  453. const LineSegments* geom,
  454. float time) const
  455. {
  456. float ftime;
  457. const int itime = geom->timeSegment(time, ftime);
  458. Vec4vf8 a0,a1;
  459. gatheri(a0,a1,geom,itime);
  460. Vec4vf8 b0,b1;
  461. gatheri(b0,b1,geom,itime+1);
  462. p0 = lerp(a0,b0,vfloat8(ftime));
  463. p1 = lerp(a1,b1,vfloat8(ftime));
  464. }
  465. template<>
  466. __forceinline void LineMi<8>::gather(Vec4vf8& p0,
  467. Vec4vf8& p1,
  468. Vec4vf8& pL,
  469. Vec4vf8& pR,
  470. const LineSegments* geom) const
  471. {
  472. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
  473. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
  474. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
  475. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
  476. const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
  477. const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
  478. const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
  479. const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
  480. transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
  481. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
  482. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
  483. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
  484. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
  485. const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
  486. const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
  487. const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
  488. const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
  489. transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
  490. const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
  491. const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
  492. const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
  493. const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
  494. const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf);
  495. const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf);
  496. const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf);
  497. const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf);
  498. transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
  499. const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
  500. const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
  501. const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
  502. const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
  503. const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf);
  504. const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf);
  505. const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf);
  506. const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf);
  507. transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
  508. }
  509. template<>
  510. __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
  511. Vec4vf8& p1,
  512. Vec4vf8& pL,
  513. Vec4vf8& pR,
  514. const LineSegments* geom,
  515. const int itime) const
  516. {
  517. const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
  518. const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
  519. const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
  520. const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
  521. const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
  522. const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
  523. const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
  524. const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
  525. transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
  526. const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
  527. const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
  528. const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
  529. const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
  530. const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
  531. const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
  532. const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
  533. const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
  534. transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
  535. const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
  536. const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
  537. const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
  538. const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
  539. const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf);
  540. const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf);
  541. const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf);
  542. const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf);
  543. transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
  544. const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
  545. const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
  546. const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
  547. const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
  548. const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf);
  549. const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf);
  550. const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf);
  551. const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf);
  552. transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
  553. }
  554. template<>
  555. __forceinline void LineMi<8>::gather(Vec4vf8& p0,
  556. Vec4vf8& p1,
  557. Vec4vf8& pL,
  558. Vec4vf8& pR,
  559. const LineSegments* geom,
  560. float time) const
  561. {
  562. float ftime;
  563. const int itime = geom->timeSegment(time, ftime);
  564. Vec4vf8 a0,a1,aL,aR;
  565. gatheri(a0,a1,aL,aR,geom,itime);
  566. Vec4vf8 b0,b1,bL,bR;
  567. gatheri(b0,b1,bL,bR,geom,itime+1);
  568. p0 = lerp(a0,b0,vfloat8(ftime));
  569. p1 = lerp(a1,b1,vfloat8(ftime));
  570. pL = lerp(aL,bL,vfloat8(ftime));
  571. pR = lerp(aR,bR,vfloat8(ftime));
  572. }
  573. template<>
  574. __forceinline void LineMi<8>::gather(Vec4vf8& p0,
  575. Vec4vf8& p1,
  576. vbool8& cL,
  577. vbool8& cR,
  578. const LineSegments* geom) const
  579. {
  580. gather(p0,p1,geom);
  581. cL = !vbool8(leftExists);
  582. cR = !vbool8(rightExists);
  583. }
  584. template<>
  585. __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
  586. Vec4vf8& p1,
  587. vbool8& cL,
  588. vbool8& cR,
  589. const LineSegments* geom,
  590. const int itime) const
  591. {
  592. gatheri(p0,p1,geom,itime);
  593. cL = !vbool8(leftExists);
  594. cR = !vbool8(rightExists);
  595. }
  596. template<>
  597. __forceinline void LineMi<8>::gather(Vec4vf8& p0,
  598. Vec4vf8& p1,
  599. vbool8& cL,
  600. vbool8& cR,
  601. const LineSegments* geom,
  602. float time) const
  603. {
  604. float ftime;
  605. const int itime = geom->timeSegment(time, ftime);
  606. Vec4vf8 a0,a1;
  607. gatheri(a0,a1,geom,itime);
  608. Vec4vf8 b0,b1;
  609. gatheri(b0,b1,geom,itime+1);
  610. p0 = lerp(a0,b0,vfloat8(ftime));
  611. p1 = lerp(a1,b1,vfloat8(ftime));
  612. cL = !vbool8(leftExists);
  613. cR = !vbool8(rightExists);
  614. }
  615. #endif
  616. template<int M>
  617. typename LineMi<M>::Type LineMi<M>::type;
  618. typedef LineMi<4> Line4i;
  619. typedef LineMi<8> Line8i;
  620. }