node_intersector_packet_stream.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "node_intersector.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. //////////////////////////////////////////////////////////////////////////////////////
  10. // Ray packet structure used in stream traversal
  11. //////////////////////////////////////////////////////////////////////////////////////
  12. template<int K, bool robust>
  13. struct TravRayKStream;
  14. /* Fast variant */
  15. template<int K>
  16. struct TravRayKStream<K, false>
  17. {
  18. __forceinline TravRayKStream() {}
  19. __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
  20. {
  21. init(ray_org, ray_dir);
  22. tnear = ray_tnear;
  23. tfar = ray_tfar;
  24. }
  25. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
  26. {
  27. rdir = rcp_safe(ray_dir);
  28. #if defined(__aarch64__)
  29. neg_org_rdir = -(ray_org * rdir);
  30. #else
  31. org_rdir = ray_org * rdir;
  32. #endif
  33. }
  34. Vec3vf<K> rdir;
  35. #if defined(__aarch64__)
  36. Vec3vf<K> neg_org_rdir;
  37. #else
  38. Vec3vf<K> org_rdir;
  39. #endif
  40. vfloat<K> tnear;
  41. vfloat<K> tfar;
  42. };
  43. template<int K>
  44. using TravRayKStreamFast = TravRayKStream<K, false>;
  45. /* Robust variant */
  46. template<int K>
  47. struct TravRayKStream<K, true>
  48. {
  49. __forceinline TravRayKStream() {}
  50. __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
  51. {
  52. init(ray_org, ray_dir);
  53. tnear = ray_tnear;
  54. tfar = ray_tfar;
  55. }
  56. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
  57. {
  58. rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
  59. org = ray_org;
  60. }
  61. Vec3vf<K> rdir;
  62. Vec3vf<K> org;
  63. vfloat<K> tnear;
  64. vfloat<K> tfar;
  65. };
  66. template<int K>
  67. using TravRayKStreamRobust = TravRayKStream<K, true>;
  68. //////////////////////////////////////////////////////////////////////////////////////
  69. // Fast AABBNode intersection
  70. //////////////////////////////////////////////////////////////////////////////////////
  71. template<int N, int K>
  72. __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
  73. const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf)
  74. {
  75. const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
  76. const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
  77. const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
  78. const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
  79. const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
  80. const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
  81. #if defined (__aarch64__)
  82. const vfloat<N> rminX = madd(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k]));
  83. const vfloat<N> rminY = madd(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k]));
  84. const vfloat<N> rminZ = madd(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k]));
  85. const vfloat<N> rmaxX = madd(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k]));
  86. const vfloat<N> rmaxY = madd(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k]));
  87. const vfloat<N> rmaxZ = madd(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k]));
  88. #else
  89. const vfloat<N> rminX = msub(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
  90. const vfloat<N> rminY = msub(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
  91. const vfloat<N> rminZ = msub(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
  92. const vfloat<N> rmaxX = msub(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
  93. const vfloat<N> rmaxY = msub(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
  94. const vfloat<N> rmaxZ = msub(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
  95. #endif
  96. const vfloat<N> rmin = maxi(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
  97. const vfloat<N> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
  98. const vbool<N> vmask_first_hit = rmin <= rmax;
  99. return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
  100. }
  101. template<int N, int K>
  102. __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
  103. const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf)
  104. {
  105. char* ptr = (char*)&node->lower_x + i*sizeof(float);
  106. const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
  107. const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
  108. const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
  109. const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
  110. const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
  111. const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
  112. #if defined (__aarch64__)
  113. const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x);
  114. const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y);
  115. const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z);
  116. const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x);
  117. const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y);
  118. const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z);
  119. #else
  120. const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x);
  121. const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y);
  122. const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z);
  123. const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x);
  124. const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y);
  125. const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z);
  126. #endif
  127. const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear);
  128. const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar);
  129. const vbool<K> vmask_first_hit = rmin <= rmax;
  130. return movemask(vmask_first_hit);
  131. }
  132. //////////////////////////////////////////////////////////////////////////////////////
  133. // Robust AABBNode intersection
  134. //////////////////////////////////////////////////////////////////////////////////////
  135. template<int N, int K>
  136. __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
  137. const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf)
  138. {
  139. const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
  140. const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
  141. const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
  142. const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
  143. const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
  144. const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
  145. const vfloat<N> rminX = (bminX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
  146. const vfloat<N> rminY = (bminY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
  147. const vfloat<N> rminZ = (bminZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
  148. const vfloat<N> rmaxX = (bmaxX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
  149. const vfloat<N> rmaxY = (bmaxY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
  150. const vfloat<N> rmaxZ = (bmaxZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
  151. const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
  152. const vfloat<N> rmin = max(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
  153. const vfloat<N> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
  154. const vbool<N> vmask_first_hit = rmin <= rmax;
  155. return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
  156. }
  157. template<int N, int K>
  158. __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
  159. const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf)
  160. {
  161. char *ptr = (char*)&node->lower_x + i*sizeof(float);
  162. const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
  163. const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
  164. const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
  165. const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
  166. const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
  167. const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
  168. const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x;
  169. const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y;
  170. const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z;
  171. const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x;
  172. const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y;
  173. const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z;
  174. const float round_up = 1.0f+3.0f*float(ulp);
  175. const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear));
  176. const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar));
  177. const vbool<K> vmask_first_hit = rmin <= rmax;
  178. return movemask(vmask_first_hit);
  179. }
  180. }
  181. }