ray.h 60 KB


  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "default.h"
  5. #include "instance_stack.h"
  6. // FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted
  7. namespace embree
  8. {
  9. static const size_t MAX_INTERNAL_STREAM_SIZE = 32;
  10. /* Ray structure for K rays */
  11. template<int K>
  12. struct RayK
  13. {
  14. /* Default construction does nothing */
  15. __forceinline RayK() {}
  16. /* Constructs a ray from origin, direction, and ray segment. Near
  17. * has to be smaller than far */
  18. __forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
  19. const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
  20. const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
  21. : org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}
  22. /* Returns the size of the ray */
  23. static __forceinline size_t size() { return K; }
  24. /* Calculates if this is a valid ray that does not cause issues during traversal */
  25. __forceinline vbool<K> valid() const
  26. {
  27. const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));
  28. const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));
  29. const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));
  30. const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);
  31. const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);
  32. return vx & vy & vz & vn & vf;
  33. }
  34. __forceinline void get(RayK<1>* ray) const;
  35. __forceinline void get(size_t i, RayK<1>& ray) const;
  36. __forceinline void set(const RayK<1>* ray);
  37. __forceinline void set(size_t i, const RayK<1>& ray);
  38. __forceinline void copy(size_t dest, size_t source);
  39. __forceinline vint<K> octant() const
  40. {
  41. return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |
  42. select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |
  43. select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));
  44. }
  45. /* Ray data */
  46. Vec3vf<K> org; // ray origin
  47. vfloat<K> _tnear; // start of ray segment
  48. Vec3vf<K> dir; // ray direction
  49. vfloat<K> _time; // time of this ray for motion blur
  50. vfloat<K> tfar; // end of ray segment
  51. vint<K> mask; // used to mask out objects during traversal
  52. vint<K> id;
  53. vint<K> flags;
  54. __forceinline vfloat<K>& tnear() { return _tnear; }
  55. __forceinline vfloat<K>& time() { return _time; }
  56. __forceinline const vfloat<K>& tnear() const { return _tnear; }
  57. __forceinline const vfloat<K>& time() const { return _time; }
  58. };
  59. /* Ray+hit structure for K rays */
  60. template<int K>
  61. struct RayHitK : RayK<K>
  62. {
  63. using RayK<K>::org;
  64. using RayK<K>::_tnear;
  65. using RayK<K>::dir;
  66. using RayK<K>::_time;
  67. using RayK<K>::tfar;
  68. using RayK<K>::mask;
  69. using RayK<K>::id;
  70. using RayK<K>::flags;
  71. using RayK<K>::tnear;
  72. using RayK<K>::time;
  73. /* Default construction does nothing */
  74. __forceinline RayHitK() {}
  75. /* Constructs a ray from origin, direction, and ray segment. Near
  76. * has to be smaller than far */
  77. __forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
  78. const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
  79. const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
  80. : RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),
  81. geomID(RTC_INVALID_GEOMETRY_ID)
  82. {
  83. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  84. instID[l] = RTC_INVALID_GEOMETRY_ID;
  85. }
  86. __forceinline RayHitK(const RayK<K>& ray)
  87. : RayK<K>(ray),
  88. geomID(RTC_INVALID_GEOMETRY_ID)
  89. {
  90. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  91. instID[l] = RTC_INVALID_GEOMETRY_ID;
  92. }
  93. __forceinline RayHitK<K>& operator =(const RayK<K>& ray)
  94. {
  95. org = ray.org;
  96. _tnear = ray._tnear;
  97. dir = ray.dir;
  98. _time = ray._time;
  99. tfar = ray.tfar;
  100. mask = ray.mask;
  101. id = ray.id;
  102. flags = ray.flags;
  103. geomID = RTC_INVALID_GEOMETRY_ID;
  104. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  105. instID[l] = RTC_INVALID_GEOMETRY_ID;
  106. return *this;
  107. }
  108. /* Calculates if the hit is valid */
  109. __forceinline void verifyHit(const vbool<K>& valid0) const
  110. {
  111. vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
  112. const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
  113. const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
  114. const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
  115. const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
  116. const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
  117. const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);
  118. if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");
  119. if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");
  120. if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");
  121. if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");
  122. if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");
  123. if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");
  124. }
  125. __forceinline void get(RayHitK<1>* ray) const;
  126. __forceinline void get(size_t i, RayHitK<1>& ray) const;
  127. __forceinline void set(const RayHitK<1>* ray);
  128. __forceinline void set(size_t i, const RayHitK<1>& ray);
  129. __forceinline void copy(size_t dest, size_t source);
  130. /* Hit data */
  131. Vec3vf<K> Ng; // geometry normal
  132. vfloat<K> u; // barycentric u coordinate of hit
  133. vfloat<K> v; // barycentric v coordinate of hit
  134. vuint<K> primID; // primitive ID
  135. vuint<K> geomID; // geometry ID
  136. vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
  137. };
  138. /* Specialization for a single ray */
  139. template<>
  140. struct RayK<1>
  141. {
  142. /* Default construction does nothing */
  143. __forceinline RayK() {}
  144. /* Constructs a ray from origin, direction, and ray segment. Near
  145. * has to be smaller than far */
  146. __forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
  147. : org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}
  148. /* Calculates if this is a valid ray that does not cause issues during traversal */
  149. __forceinline bool valid() const {
  150. return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);
  151. }
  152. /* Ray data */
  153. Vec3ff org; // 3 floats for ray origin, 1 float for tnear
  154. //float tnear; // start of ray segment
  155. Vec3ff dir; // 3 floats for ray direction, 1 float for time
  156. // float time;
  157. float tfar; // end of ray segment
  158. int mask; // used to mask out objects during traversal
  159. int id; // ray ID
  160. int flags; // ray flags
  161. __forceinline float& tnear() { return org.w; };
  162. __forceinline const float& tnear() const { return org.w; };
  163. __forceinline float& time() { return dir.w; };
  164. __forceinline const float& time() const { return dir.w; };
  165. };
  166. template<>
  167. struct RayHitK<1> : RayK<1>
  168. {
  169. /* Default construction does nothing */
  170. __forceinline RayHitK() {}
  171. /* Constructs a ray from origin, direction, and ray segment. Near
  172. * has to be smaller than far */
  173. __forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
  174. : RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),
  175. geomID(RTC_INVALID_GEOMETRY_ID) {}
  176. __forceinline RayHitK(const RayK<1>& ray)
  177. : RayK<1>(ray),
  178. geomID(RTC_INVALID_GEOMETRY_ID) {}
  179. __forceinline RayHitK<1>& operator =(const RayK<1>& ray)
  180. {
  181. org = ray.org;
  182. dir = ray.dir;
  183. tfar = ray.tfar;
  184. mask = ray.mask;
  185. id = ray.id;
  186. flags = ray.flags;
  187. geomID = RTC_INVALID_GEOMETRY_ID;
  188. return *this;
  189. }
  190. /* Calculates if the hit is valid */
  191. __forceinline void verifyHit() const
  192. {
  193. if (geomID == RTC_INVALID_GEOMETRY_ID) return;
  194. const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));
  195. const bool vu = (abs(u) <= FLT_LARGE);
  196. const bool vv = (abs(u) <= FLT_LARGE);
  197. const bool vnx = abs(Ng.x) <= FLT_LARGE;
  198. const bool vny = abs(Ng.y) <= FLT_LARGE;
  199. const bool vnz = abs(Ng.z) <= FLT_LARGE;
  200. if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");
  201. if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");
  202. if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");
  203. if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");
  204. if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");
  205. if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");
  206. }
  207. /* Hit data */
  208. Vec3f Ng; // not normalized geometry normal
  209. float u; // barycentric u coordinate of hit
  210. float v; // barycentric v coordinate of hit
  211. unsigned int primID; // primitive ID
  212. unsigned int geomID; // geometry ID
  213. unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
  214. };
  215. /* Converts ray packet to single rays */
  216. template<int K>
  217. __forceinline void RayK<K>::get(RayK<1>* ray) const
  218. {
  219. for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
  220. {
  221. ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];
  222. ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];
  223. ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];
  224. }
  225. }
  226. template<int K>
  227. __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
  228. {
  229. // FIXME: use SIMD transpose
  230. for (size_t i = 0; i < K; i++)
  231. get(i, ray[i]);
  232. }
  233. /* Extracts a single ray out of a ray packet*/
  234. template<int K>
  235. __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
  236. {
  237. ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
  238. ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];
  239. ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
  240. }
  241. template<int K>
  242. __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
  243. {
  244. ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
  245. ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];
  246. ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
  247. ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];
  248. ray.u = u[i]; ray.v = v[i];
  249. ray.primID = primID[i]; ray.geomID = geomID[i];
  250. instance_id_stack::copy_VU<K>(instID, ray.instID, i);
  251. }
  252. /* Converts single rays to ray packet */
  253. template<int K>
  254. __forceinline void RayK<K>::set(const RayK<1>* ray)
  255. {
  256. // FIXME: use SIMD transpose
  257. for (size_t i = 0; i < K; i++)
  258. set(i, ray[i]);
  259. }
  260. template<int K>
  261. __forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
  262. {
  263. // FIXME: use SIMD transpose
  264. for (size_t i = 0; i < K; i++)
  265. set(i, ray[i]);
  266. }
  267. /* inserts a single ray into a ray packet element */
  268. template<int K>
  269. __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
  270. {
  271. org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
  272. dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
  273. tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
  274. }
  275. template<int K>
  276. __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
  277. {
  278. org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
  279. dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
  280. tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
  281. Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;
  282. u[i] = ray.u; v[i] = ray.v;
  283. primID[i] = ray.primID; geomID[i] = ray.geomID;
  284. instance_id_stack::copy_UV<K>(ray.instID, instID, i);
  285. }
  286. /* copies a ray packet element into another element*/
  287. template<int K>
  288. __forceinline void RayK<K>::copy(size_t dest, size_t source)
  289. {
  290. org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
  291. dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
  292. tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
  293. }
  294. template<int K>
  295. __forceinline void RayHitK<K>::copy(size_t dest, size_t source)
  296. {
  297. org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
  298. dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
  299. tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
  300. Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];
  301. u[dest] = u[source]; v[dest] = v[source];
  302. primID[dest] = primID[source]; geomID[dest] = geomID[source];
  303. instance_id_stack::copy_VV<K>(instID, instID, source, dest);
  304. }
  305. /* Shortcuts */
  306. typedef RayK<1> Ray;
  307. typedef RayK<4> Ray4;
  308. typedef RayK<8> Ray8;
  309. typedef RayK<16> Ray16;
  310. struct RayN;
  311. typedef RayHitK<1> RayHit;
  312. typedef RayHitK<4> RayHit4;
  313. typedef RayHitK<8> RayHit8;
  314. typedef RayHitK<16> RayHit16;
  315. struct RayHitN;
  316. template<int K, bool intersect>
  317. struct RayTypeHelper;
  318. template<int K>
  319. struct RayTypeHelper<K, true>
  320. {
  321. typedef RayHitK<K> Ty;
  322. };
  323. template<int K>
  324. struct RayTypeHelper<K, false>
  325. {
  326. typedef RayK<K> Ty;
  327. };
  328. template<bool intersect>
  329. using RayType = typename RayTypeHelper<1, intersect>::Ty;
  330. template<int K, bool intersect>
  331. using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;
  332. /* Outputs ray to stream */
  333. template<int K>
  334. __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
  335. {
  336. return cout << "{ " << embree_endl
  337. << " org = " << ray.org << embree_endl
  338. << " dir = " << ray.dir << embree_endl
  339. << " near = " << ray.tnear() << embree_endl
  340. << " far = " << ray.tfar << embree_endl
  341. << " time = " << ray.time() << embree_endl
  342. << " mask = " << ray.mask << embree_endl
  343. << " id = " << ray.id << embree_endl
  344. << " flags = " << ray.flags << embree_endl
  345. << "}";
  346. }
  347. template<int K>
  348. __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
  349. {
  350. cout << "{ " << embree_endl
  351. << " org = " << ray.org << embree_endl
  352. << " dir = " << ray.dir << embree_endl
  353. << " near = " << ray.tnear() << embree_endl
  354. << " far = " << ray.tfar << embree_endl
  355. << " time = " << ray.time() << embree_endl
  356. << " mask = " << ray.mask << embree_endl
  357. << " id = " << ray.id << embree_endl
  358. << " flags = " << ray.flags << embree_endl
  359. << " Ng = " << ray.Ng
  360. << " u = " << ray.u << embree_endl
  361. << " v = " << ray.v << embree_endl
  362. << " primID = " << ray.primID << embree_endl
  363. << " geomID = " << ray.geomID << embree_endl
  364. << " instID =";
  365. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  366. {
  367. cout << " " << ray.instID[l];
  368. }
  369. cout << embree_endl;
  370. return cout << "}";
  371. }
  372. struct RayStreamSOA
  373. {
  374. __forceinline RayStreamSOA(void* rays, size_t N)
  375. : ptr((char*)rays), N(N) {}
  376. /* ray data access functions */
  377. __forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin
  378. __forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin
  379. __forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin
  380. __forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment
  381. __forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction
  382. __forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction
  383. __forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction
  384. __forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur
  385. __forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)
  386. __forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)
  387. __forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id
  388. __forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags
  389. /* hit data access functions */
  390. __forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal
  391. __forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal
  392. __forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal
  393. __forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit
  394. __forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit
  395. __forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID
  396. __forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID
  397. __forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID
  398. __forceinline Ray getRayByOffset(size_t offset)
  399. {
  400. Ray ray;
  401. ray.org.x = org_x(offset)[0];
  402. ray.org.y = org_y(offset)[0];
  403. ray.org.z = org_z(offset)[0];
  404. ray.tnear() = tnear(offset)[0];
  405. ray.dir.x = dir_x(offset)[0];
  406. ray.dir.y = dir_y(offset)[0];
  407. ray.dir.z = dir_z(offset)[0];
  408. ray.time() = time(offset)[0];
  409. ray.tfar = tfar(offset)[0];
  410. ray.mask = mask(offset)[0];
  411. ray.id = id(offset)[0];
  412. ray.flags = flags(offset)[0];
  413. return ray;
  414. }
  415. template<int K>
  416. __forceinline RayK<K> getRayByOffset(size_t offset)
  417. {
  418. RayK<K> ray;
  419. ray.org.x = vfloat<K>::loadu(org_x(offset));
  420. ray.org.y = vfloat<K>::loadu(org_y(offset));
  421. ray.org.z = vfloat<K>::loadu(org_z(offset));
  422. ray.tnear = vfloat<K>::loadu(tnear(offset));
  423. ray.dir.x = vfloat<K>::loadu(dir_x(offset));
  424. ray.dir.y = vfloat<K>::loadu(dir_y(offset));
  425. ray.dir.z = vfloat<K>::loadu(dir_z(offset));
  426. ray.time = vfloat<K>::loadu(time(offset));
  427. ray.tfar = vfloat<K>::loadu(tfar(offset));
  428. ray.mask = vint<K>::loadu(mask(offset));
  429. ray.id = vint<K>::loadu(id(offset));
  430. ray.flags = vint<K>::loadu(flags(offset));
  431. return ray;
  432. }
  433. template<int K>
  434. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
  435. {
  436. RayK<K> ray;
  437. ray.org.x = vfloat<K>::loadu(valid, org_x(offset));
  438. ray.org.y = vfloat<K>::loadu(valid, org_y(offset));
  439. ray.org.z = vfloat<K>::loadu(valid, org_z(offset));
  440. ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));
  441. ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));
  442. ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));
  443. ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));
  444. ray.time() = vfloat<K>::loadu(valid, time(offset));
  445. ray.tfar = vfloat<K>::loadu(valid, tfar(offset));
  446. #if !defined(__AVX__)
  447. /* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,
  448. because the SSE masked loads always access the entire vector */
  449. if (unlikely(!all(valid)))
  450. {
  451. ray.mask = zero;
  452. ray.id = zero;
  453. ray.flags = zero;
  454. for (size_t k = 0; k < K; k++)
  455. {
  456. if (likely(valid[k]))
  457. {
  458. ray.mask[k] = mask(offset)[k];
  459. ray.id[k] = id(offset)[k];
  460. ray.flags[k] = flags(offset)[k];
  461. }
  462. }
  463. }
  464. else
  465. #endif
  466. {
  467. ray.mask = vint<K>::loadu(valid, mask(offset));
  468. ray.id = vint<K>::loadu(valid, id(offset));
  469. ray.flags = vint<K>::loadu(valid, flags(offset));
  470. }
  471. return ray;
  472. }
  473. template<int K>
  474. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
  475. {
  476. /*
  477. * valid_i: stores which of the input rays exist (do not access nonexistent rays!)
  478. * valid: stores which of the rays actually hit something.
  479. */
  480. vbool<K> valid = valid_i;
  481. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  482. if (likely(any(valid)))
  483. {
  484. vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
  485. vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);
  486. vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);
  487. vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);
  488. vfloat<K>::storeu(valid, u(offset), ray.u);
  489. vfloat<K>::storeu(valid, v(offset), ray.v);
  490. #if !defined(__AVX__)
  491. /* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,
  492. because the SSE masked stores always access the entire vector */
  493. if (unlikely(!all(valid_i)))
  494. {
  495. for (size_t k = 0; k < K; k++)
  496. {
  497. if (likely(valid[k]))
  498. {
  499. primID(offset)[k] = ray.primID[k];
  500. geomID(offset)[k] = ray.geomID[k];
  501. instID(0, offset)[k] = ray.instID[0][k];
  502. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  503. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
  504. instID(l, offset)[k] = ray.instID[l][k];
  505. #endif
  506. }
  507. }
  508. }
  509. else
  510. #endif
  511. {
  512. vuint<K>::storeu(valid, primID(offset), ray.primID);
  513. vuint<K>::storeu(valid, geomID(offset), ray.geomID);
  514. vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);
  515. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  516. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
  517. vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);
  518. #endif
  519. }
  520. }
  521. }
  522. template<int K>
  523. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
  524. {
  525. vbool<K> valid = valid_i;
  526. valid &= (ray.tfar < 0.0f);
  527. if (likely(any(valid)))
  528. vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
  529. }
  530. __forceinline size_t getOctantByOffset(size_t offset)
  531. {
  532. const float dx = dir_x(offset)[0];
  533. const float dy = dir_y(offset)[0];
  534. const float dz = dir_z(offset)[0];
  535. const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
  536. return octantID;
  537. }
  538. __forceinline bool isValidByOffset(size_t offset)
  539. {
  540. const float nnear = tnear(offset)[0];
  541. const float ffar = tfar(offset)[0];
  542. return nnear <= ffar;
  543. }
  544. template<int K>
  545. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
  546. {
  547. RayK<K> ray;
  548. #if defined(__AVX2__)
  549. ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);
  550. ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);
  551. ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);
  552. ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);
  553. ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);
  554. ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);
  555. ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);
  556. ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);
  557. ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);
  558. ray.mask = vint<K>::template gather<1>(valid, mask(), offset);
  559. ray.id = vint<K>::template gather<1>(valid, id(), offset);
  560. ray.flags = vint<K>::template gather<1>(valid, flags(), offset);
  561. #else
  562. ray.org = zero;
  563. ray.tnear() = zero;
  564. ray.dir = zero;
  565. ray.time() = zero;
  566. ray.tfar = zero;
  567. ray.mask = zero;
  568. ray.id = zero;
  569. ray.flags = zero;
  570. for (size_t k = 0; k < K; k++)
  571. {
  572. if (likely(valid[k]))
  573. {
  574. const size_t ofs = offset[k];
  575. ray.org.x[k] = *org_x(ofs);
  576. ray.org.y[k] = *org_y(ofs);
  577. ray.org.z[k] = *org_z(ofs);
  578. ray.tnear()[k] = *tnear(ofs);
  579. ray.dir.x[k] = *dir_x(ofs);
  580. ray.dir.y[k] = *dir_y(ofs);
  581. ray.dir.z[k] = *dir_z(ofs);
  582. ray.time()[k] = *time(ofs);
  583. ray.tfar[k] = *tfar(ofs);
  584. ray.mask[k] = *mask(ofs);
  585. ray.id[k] = *id(ofs);
  586. ray.flags[k] = *flags(ofs);
  587. }
  588. }
  589. #endif
  590. return ray;
  591. }
  592. template<int K>
  593. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
  594. {
  595. vbool<K> valid = valid_i;
  596. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  597. if (likely(any(valid)))
  598. {
  599. #if defined(__AVX512F__)
  600. vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
  601. vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);
  602. vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);
  603. vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);
  604. vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);
  605. vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);
  606. vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);
  607. vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);
  608. vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);
  609. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  610. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
  611. vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);
  612. #endif
  613. #else
  614. size_t valid_bits = movemask(valid);
  615. while (valid_bits != 0)
  616. {
  617. const size_t k = bscf(valid_bits);
  618. const size_t ofs = offset[k];
  619. *tfar(ofs) = ray.tfar[k];
  620. *Ng_x(ofs) = ray.Ng.x[k];
  621. *Ng_y(ofs) = ray.Ng.y[k];
  622. *Ng_z(ofs) = ray.Ng.z[k];
  623. *u(ofs) = ray.u[k];
  624. *v(ofs) = ray.v[k];
  625. *primID(ofs) = ray.primID[k];
  626. *geomID(ofs) = ray.geomID[k];
  627. *instID(0, ofs) = ray.instID[0][k];
  628. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  629. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
  630. *instID(l, ofs) = ray.instID[l][k];
  631. #endif
  632. }
  633. #endif
  634. }
  635. }
  636. template<int K>
  637. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
  638. {
  639. vbool<K> valid = valid_i;
  640. valid &= (ray.tfar < 0.0f);
  641. if (likely(any(valid)))
  642. {
  643. #if defined(__AVX512F__)
  644. vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
  645. #else
  646. size_t valid_bits = movemask(valid);
  647. while (valid_bits != 0)
  648. {
  649. const size_t k = bscf(valid_bits);
  650. const size_t ofs = offset[k];
  651. *tfar(ofs) = ray.tfar[k];
  652. }
  653. #endif
  654. }
  655. }
  656. char* __restrict__ ptr;
  657. size_t N;
  658. };
  659. template<size_t MAX_K>
  660. struct StackRayStreamSOA : public RayStreamSOA
  661. {
  662. __forceinline StackRayStreamSOA(size_t K)
  663. : RayStreamSOA(data, K) { assert(K <= MAX_K); }
  664. char data[MAX_K / 4 * sizeof(RayHit4)];
  665. };
  666. struct RayStreamSOP
  667. {
  668. template<class T>
  669. __forceinline void init(T& t)
  670. {
  671. org_x = (float*)&t.org.x;
  672. org_y = (float*)&t.org.y;
  673. org_z = (float*)&t.org.z;
  674. tnear = (float*)&t.tnear;
  675. dir_x = (float*)&t.dir.x;
  676. dir_y = (float*)&t.dir.y;
  677. dir_z = (float*)&t.dir.z;
  678. time = (float*)&t.time;
  679. tfar = (float*)&t.tfar;
  680. mask = (unsigned int*)&t.mask;
  681. id = (unsigned int*)&t.id;
  682. flags = (unsigned int*)&t.flags;
  683. Ng_x = (float*)&t.Ng.x;
  684. Ng_y = (float*)&t.Ng.y;
  685. Ng_z = (float*)&t.Ng.z;
  686. u = (float*)&t.u;
  687. v = (float*)&t.v;
  688. primID = (unsigned int*)&t.primID;
  689. geomID = (unsigned int*)&t.geomID;
  690. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  691. instID[l] = (unsigned int*)&t.instID[l];
  692. }
  693. __forceinline Ray getRayByOffset(size_t offset)
  694. {
  695. Ray ray;
  696. ray.org.x = *(float* __restrict__)((char*)org_x + offset);
  697. ray.org.y = *(float* __restrict__)((char*)org_y + offset);
  698. ray.org.z = *(float* __restrict__)((char*)org_z + offset);
  699. ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);
  700. ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);
  701. ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);
  702. ray.tfar = *(float* __restrict__)((char*)tfar + offset);
  703. ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
  704. ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;
  705. ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;
  706. ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;
  707. ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;
  708. return ray;
  709. }
  710. template<int K>
  711. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
  712. {
  713. RayK<K> ray;
  714. ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));
  715. ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));
  716. ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));
  717. ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
  718. ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
  719. ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
  720. ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
  721. ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
  722. ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;
  723. ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;
  724. ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;
  725. ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;
  726. return ray;
  727. }
  728. template<int K>
  729. __forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)
  730. {
  731. Vec3vf<K> dir;
  732. dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
  733. dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
  734. dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
  735. return dir;
  736. }
  737. __forceinline void setHitByOffset(size_t offset, const RayHit& ray)
  738. {
  739. if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
  740. {
  741. *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
  742. if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;
  743. if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;
  744. if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;
  745. *(float* __restrict__)((char*)u + offset) = ray.u;
  746. *(float* __restrict__)((char*)v + offset) = ray.v;
  747. *(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;
  748. *(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;
  749. if (likely(instID[0])) {
  750. *(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];
  751. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  752. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l)
  753. *(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];
  754. #endif
  755. }
  756. }
  757. }
  758. __forceinline void setHitByOffset(size_t offset, const Ray& ray)
  759. {
  760. *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
  761. }
  762. template<int K>
  763. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
  764. {
  765. vbool<K> valid = valid_i;
  766. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  767. if (likely(any(valid)))
  768. {
  769. vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
  770. if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);
  771. if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);
  772. if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);
  773. vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);
  774. vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);
  775. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);
  776. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);
  777. if (likely(instID[0])) {
  778. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);
  779. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  780. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
  781. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);
  782. #endif
  783. }
  784. }
  785. }
  786. template<int K>
  787. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
  788. {
  789. vbool<K> valid = valid_i;
  790. valid &= (ray.tfar < 0.0f);
  791. if (likely(any(valid)))
  792. vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
  793. }
  794. __forceinline size_t getOctantByOffset(size_t offset)
  795. {
  796. const float dx = *(float* __restrict__)((char*)dir_x + offset);
  797. const float dy = *(float* __restrict__)((char*)dir_y + offset);
  798. const float dz = *(float* __restrict__)((char*)dir_z + offset);
  799. const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
  800. return octantID;
  801. }
  802. __forceinline bool isValidByOffset(size_t offset)
  803. {
  804. const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
  805. const float ffar = *(float* __restrict__)((char*)tfar + offset);
  806. return nnear <= ffar;
  807. }
  808. template<int K>
  809. __forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)
  810. {
  811. const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
  812. const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
  813. return nnear <= ffar;
  814. }
  815. template<int K>
  816. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
  817. {
  818. RayK<K> ray;
  819. #if defined(__AVX2__)
  820. ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);
  821. ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);
  822. ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);
  823. ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);
  824. ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);
  825. ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);
  826. ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);
  827. ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);
  828. ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);
  829. ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);
  830. ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);
  831. ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);
  832. #else
  833. ray.org = zero;
  834. ray.tnear() = zero;
  835. ray.dir = zero;
  836. ray.tfar = zero;
  837. ray.time() = zero;
  838. ray.mask = zero;
  839. ray.id = zero;
  840. ray.flags = zero;
  841. for (size_t k = 0; k < K; k++)
  842. {
  843. if (likely(valid[k]))
  844. {
  845. const size_t ofs = offset[k];
  846. ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);
  847. ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);
  848. ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);
  849. ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);
  850. ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);
  851. ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);
  852. ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);
  853. ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;
  854. ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;
  855. ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;
  856. ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;
  857. ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;
  858. }
  859. }
  860. #endif
  861. return ray;
  862. }
  863. template<int K>
  864. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
  865. {
  866. vbool<K> valid = valid_i;
  867. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  868. if (likely(any(valid)))
  869. {
  870. #if defined(__AVX512F__)
  871. vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
  872. if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);
  873. if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);
  874. if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);
  875. vfloat<K>::template scatter<1>(valid, u, offset, ray.u);
  876. vfloat<K>::template scatter<1>(valid, v, offset, ray.v);
  877. vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);
  878. vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);
  879. if (likely(instID[0])) {
  880. vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);
  881. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  882. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
  883. vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);
  884. #endif
  885. }
  886. #else
  887. size_t valid_bits = movemask(valid);
  888. while (valid_bits != 0)
  889. {
  890. const size_t k = bscf(valid_bits);
  891. const size_t ofs = offset[k];
  892. *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
  893. if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];
  894. if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];
  895. if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];
  896. *(float* __restrict__)((char*)u + ofs) = ray.u[k];
  897. *(float* __restrict__)((char*)v + ofs) = ray.v[k];
  898. *(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];
  899. *(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];
  900. if (likely(instID[0])) {
  901. *(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];
  902. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  903. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
  904. *(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];
  905. #endif
  906. }
  907. }
  908. #endif
  909. }
  910. }
  911. template<int K>
  912. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
  913. {
  914. vbool<K> valid = valid_i;
  915. valid &= (ray.tfar < 0.0f);
  916. if (likely(any(valid)))
  917. {
  918. #if defined(__AVX512F__)
  919. vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
  920. #else
  921. size_t valid_bits = movemask(valid);
  922. while (valid_bits != 0)
  923. {
  924. const size_t k = bscf(valid_bits);
  925. const size_t ofs = offset[k];
  926. *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
  927. }
  928. #endif
  929. }
  930. }
  931. /* ray data */
  932. float* __restrict__ org_x; // x coordinate of ray origin
  933. float* __restrict__ org_y; // y coordinate of ray origin
  934. float* __restrict__ org_z; // z coordinate of ray origin
  935. float* __restrict__ tnear; // start of ray segment (optional)
  936. float* __restrict__ dir_x; // x coordinate of ray direction
  937. float* __restrict__ dir_y; // y coordinate of ray direction
  938. float* __restrict__ dir_z; // z coordinate of ray direction
  939. float* __restrict__ time; // time of this ray for motion blur (optional)
  940. float* __restrict__ tfar; // end of ray segment (set to hit distance)
  941. unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)
  942. unsigned int* __restrict__ id; // ray ID
  943. unsigned int* __restrict__ flags; // ray flags
  944. /* hit data */
  945. float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)
  946. float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)
  947. float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)
  948. float* __restrict__ u; // barycentric u coordinate of hit
  949. float* __restrict__ v; // barycentric v coordinate of hit
  950. unsigned int* __restrict__ primID; // primitive ID
  951. unsigned int* __restrict__ geomID; // geometry ID
  952. unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID (optional)
  953. };
  954. struct RayStreamAOS
  955. {
  956. __forceinline RayStreamAOS(void* rays)
  957. : ptr((Ray*)rays) {}
  958. __forceinline Ray& getRayByOffset(size_t offset)
  959. {
  960. return *(Ray*)((char*)ptr + offset);
  961. }
  962. template<int K>
  963. __forceinline RayK<K> getRayByOffset(const vint<K>& offset);
  964. template<int K>
  965. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
  966. {
  967. const vint<K> valid_offset = select(valid, offset, vintx(zero));
  968. return getRayByOffset<K>(valid_offset);
  969. }
  970. template<int K>
  971. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
  972. {
  973. vbool<K> valid = valid_i;
  974. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  975. if (likely(any(valid)))
  976. {
  977. #if defined(__AVX512F__)
  978. vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
  979. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);
  980. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);
  981. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);
  982. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);
  983. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);
  984. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);
  985. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);
  986. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);
  987. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  988. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
  989. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);
  990. #endif
  991. #else
  992. size_t valid_bits = movemask(valid);
  993. while (valid_bits != 0)
  994. {
  995. const size_t k = bscf(valid_bits);
  996. RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);
  997. ray_k->tfar = ray.tfar[k];
  998. ray_k->Ng.x = ray.Ng.x[k];
  999. ray_k->Ng.y = ray.Ng.y[k];
  1000. ray_k->Ng.z = ray.Ng.z[k];
  1001. ray_k->u = ray.u[k];
  1002. ray_k->v = ray.v[k];
  1003. ray_k->primID = ray.primID[k];
  1004. ray_k->geomID = ray.geomID[k];
  1005. instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
  1006. }
  1007. #endif
  1008. }
  1009. }
  1010. template<int K>
  1011. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
  1012. {
  1013. vbool<K> valid = valid_i;
  1014. valid &= (ray.tfar < 0.0f);
  1015. if (likely(any(valid)))
  1016. {
  1017. #if defined(__AVX512F__)
  1018. vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
  1019. #else
  1020. size_t valid_bits = movemask(valid);
  1021. while (valid_bits != 0)
  1022. {
  1023. const size_t k = bscf(valid_bits);
  1024. Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);
  1025. ray_k->tfar = ray.tfar[k];
  1026. }
  1027. #endif
  1028. }
  1029. }
  1030. Ray* __restrict__ ptr;
  1031. };
  1032. template<>
  1033. __forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset)
  1034. {
  1035. Ray4 ray;
  1036. /* load and transpose: org.x, org.y, org.z, tnear */
  1037. const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
  1038. const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
  1039. const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
  1040. const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
  1041. transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
  1042. /* load and transpose: dir.x, dir.y, dir.z, time */
  1043. const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);
  1044. const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);
  1045. const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);
  1046. const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);
  1047. transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1048. /* load and transpose: tfar, mask, id, flags */
  1049. const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
  1050. const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
  1051. const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
  1052. const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
  1053. vfloat4 maskf, idf, flagsf;
  1054. transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
  1055. ray.mask = asInt(maskf);
  1056. ray.id = asInt(idf);
  1057. ray.flags = asInt(flagsf);
  1058. return ray;
  1059. }
  1060. #if defined(__AVX__)
  1061. template<>
  1062. __forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset)
  1063. {
  1064. Ray8 ray;
  1065. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1066. const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
  1067. const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
  1068. const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
  1069. const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
  1070. const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
  1071. const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
  1072. const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
  1073. const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);
  1074. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1075. /* load and transpose: tfar, mask, id, flags */
  1076. const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
  1077. const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
  1078. const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
  1079. const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
  1080. const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
  1081. const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
  1082. const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
  1083. const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);
  1084. vfloat8 maskf, idf, flagsf;
  1085. transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
  1086. ray.mask = asInt(maskf);
  1087. ray.id = asInt(idf);
  1088. ray.flags = asInt(flagsf);
  1089. return ray;
  1090. }
  1091. #endif
  1092. #if defined(__AVX512F__)
  1093. template<>
  1094. __forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset)
  1095. {
  1096. Ray16 ray;
  1097. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1098. const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
  1099. const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
  1100. const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
  1101. const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
  1102. const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
  1103. const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
  1104. const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
  1105. const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
  1106. const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
  1107. const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
  1108. const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
  1109. const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
  1110. const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
  1111. const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
  1112. const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
  1113. const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);
  1114. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
  1115. ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1116. /* load and transpose: tfar, mask, id, flags */
  1117. const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
  1118. const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
  1119. const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
  1120. const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
  1121. const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
  1122. const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
  1123. const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
  1124. const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
  1125. const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
  1126. const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
  1127. const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
  1128. const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
  1129. const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
  1130. const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
  1131. const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
  1132. const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);
  1133. vfloat16 maskf, idf, flagsf;
  1134. transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
  1135. ray.tfar, maskf, idf, flagsf);
  1136. ray.mask = asInt(maskf);
  1137. ray.id = asInt(idf);
  1138. ray.flags = asInt(flagsf);
  1139. return ray;
  1140. }
  1141. #endif
  1142. struct RayStreamAOP
  1143. {
  1144. __forceinline RayStreamAOP(void* rays)
  1145. : ptr((Ray**)rays) {}
  1146. __forceinline Ray& getRayByIndex(size_t index)
  1147. {
  1148. return *ptr[index];
  1149. }
  1150. template<int K>
  1151. __forceinline RayK<K> getRayByIndex(const vint<K>& index);
  1152. template<int K>
  1153. __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)
  1154. {
  1155. const vint<K> valid_index = select(valid, index, vintx(zero));
  1156. return getRayByIndex<K>(valid_index);
  1157. }
  1158. template<int K>
  1159. __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)
  1160. {
  1161. vbool<K> valid = valid_i;
  1162. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  1163. if (likely(any(valid)))
  1164. {
  1165. size_t valid_bits = movemask(valid);
  1166. while (valid_bits != 0)
  1167. {
  1168. const size_t k = bscf(valid_bits);
  1169. RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];
  1170. ray_k->tfar = ray.tfar[k];
  1171. ray_k->Ng.x = ray.Ng.x[k];
  1172. ray_k->Ng.y = ray.Ng.y[k];
  1173. ray_k->Ng.z = ray.Ng.z[k];
  1174. ray_k->u = ray.u[k];
  1175. ray_k->v = ray.v[k];
  1176. ray_k->primID = ray.primID[k];
  1177. ray_k->geomID = ray.geomID[k];
  1178. instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
  1179. }
  1180. }
  1181. }
  1182. template<int K>
  1183. __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)
  1184. {
  1185. vbool<K> valid = valid_i;
  1186. valid &= (ray.tfar < 0.0f);
  1187. if (likely(any(valid)))
  1188. {
  1189. size_t valid_bits = movemask(valid);
  1190. while (valid_bits != 0)
  1191. {
  1192. const size_t k = bscf(valid_bits);
  1193. Ray* __restrict__ ray_k = ptr[index[k]];
  1194. ray_k->tfar = ray.tfar[k];
  1195. }
  1196. }
  1197. }
  1198. Ray** __restrict__ ptr;
  1199. };
  1200. template<>
  1201. __forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index)
  1202. {
  1203. Ray4 ray;
  1204. /* load and transpose: org.x, org.y, org.z, tnear */
  1205. const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);
  1206. const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);
  1207. const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);
  1208. const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);
  1209. transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
  1210. /* load and transpose: dir.x, dir.y, dir.z, time */
  1211. const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);
  1212. const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);
  1213. const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);
  1214. const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);
  1215. transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1216. /* load and transpose: tfar, mask, id, flags */
  1217. const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
  1218. const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
  1219. const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
  1220. const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
  1221. vfloat4 maskf, idf, flagsf;
  1222. transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
  1223. ray.mask = asInt(maskf);
  1224. ray.id = asInt(idf);
  1225. ray.flags = asInt(flagsf);
  1226. return ray;
  1227. }
  1228. #if defined(__AVX__)
  1229. template<>
  1230. __forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index)
  1231. {
  1232. Ray8 ray;
  1233. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1234. const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
  1235. const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
  1236. const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
  1237. const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
  1238. const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
  1239. const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
  1240. const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
  1241. const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
  1242. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1243. /* load and transpose: tfar, mask, id, flags */
  1244. const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
  1245. const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
  1246. const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
  1247. const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
  1248. const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
  1249. const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
  1250. const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
  1251. const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
  1252. vfloat8 maskf, idf, flagsf;
  1253. transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
  1254. ray.mask = asInt(maskf);
  1255. ray.id = asInt(idf);
  1256. ray.flags = asInt(flagsf);
  1257. return ray;
  1258. }
  1259. #endif
  1260. #if defined(__AVX512F__)
  1261. template<>
  1262. __forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index)
  1263. {
  1264. Ray16 ray;
  1265. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1266. const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
  1267. const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
  1268. const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
  1269. const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
  1270. const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
  1271. const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
  1272. const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
  1273. const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
  1274. const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);
  1275. const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);
  1276. const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
  1277. const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
  1278. const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
  1279. const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
  1280. const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
  1281. const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);
  1282. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
  1283. ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1284. /* load and transpose: tfar, mask, id, flags */
  1285. const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
  1286. const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
  1287. const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
  1288. const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
  1289. const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
  1290. const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
  1291. const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
  1292. const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
  1293. const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);
  1294. const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);
  1295. const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
  1296. const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
  1297. const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
  1298. const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
  1299. const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
  1300. const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);
  1301. vfloat16 maskf, idf, flagsf;
  1302. transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
  1303. ray.tfar, maskf, idf, flagsf);
  1304. ray.mask = asInt(maskf);
  1305. ray.id = asInt(idf);
  1306. ray.flags = asInt(flagsf);
  1307. return ray;
  1308. }
  1309. #endif
  1310. }