stack_item.h 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "default.h"
  5. namespace embree
  6. {
  7. /*! An item on the stack holds the node ID and distance of that node. */
  8. template<typename T>
  9. struct __aligned(16) StackItemT
  10. {
  11. /*! assert that the xchg function works */
  12. static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
  13. __forceinline StackItemT() {}
  14. __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
  15. /*! use SSE instructions to swap stack items */
  16. __forceinline static void xchg(StackItemT& a, StackItemT& b)
  17. {
  18. const vfloat4 sse_a = vfloat4::load((float*)&a);
  19. const vfloat4 sse_b = vfloat4::load((float*)&b);
  20. vfloat4::store(&a,sse_b);
  21. vfloat4::store(&b,sse_a);
  22. }
  23. /*! Sort 2 stack items. */
  24. __forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
  25. if (s2.dist < s1.dist) xchg(s2,s1);
  26. }
  27. /*! Sort 3 stack items. */
  28. __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
  29. {
  30. if (s2.dist < s1.dist) xchg(s2,s1);
  31. if (s3.dist < s2.dist) xchg(s3,s2);
  32. if (s2.dist < s1.dist) xchg(s2,s1);
  33. }
  34. /*! Sort 4 stack items. */
  35. __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
  36. {
  37. if (s2.dist < s1.dist) xchg(s2,s1);
  38. if (s4.dist < s3.dist) xchg(s4,s3);
  39. if (s3.dist < s1.dist) xchg(s3,s1);
  40. if (s4.dist < s2.dist) xchg(s4,s2);
  41. if (s3.dist < s2.dist) xchg(s3,s2);
  42. }
  43. /*! use SSE instructions to swap stack items */
  44. __forceinline static void cmp_xchg(vint4& a, vint4& b)
  45. {
  46. #if defined(__AVX512VL__)
  47. const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
  48. #else
  49. const vboolf4 mask0(b < a);
  50. const vboolf4 mask(shuffle<2,2,2,2>(mask0));
  51. #endif
  52. const vint4 c = select(mask,b,a);
  53. const vint4 d = select(mask,a,b);
  54. a = c;
  55. b = d;
  56. }
  57. /*! Sort 3 stack items. */
  58. __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
  59. {
  60. cmp_xchg(s2,s1);
  61. cmp_xchg(s3,s2);
  62. cmp_xchg(s2,s1);
  63. }
  64. /*! Sort 4 stack items. */
  65. __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
  66. {
  67. cmp_xchg(s2,s1);
  68. cmp_xchg(s4,s3);
  69. cmp_xchg(s3,s1);
  70. cmp_xchg(s4,s2);
  71. cmp_xchg(s3,s2);
  72. }
  73. /*! Sort N stack items. */
  74. __forceinline friend void sort(StackItemT* begin, StackItemT* end)
  75. {
  76. for (StackItemT* i = begin+1; i != end; ++i)
  77. {
  78. const vfloat4 item = vfloat4::load((float*)i);
  79. const unsigned dist = i->dist;
  80. StackItemT* j = i;
  81. while ((j != begin) && ((j-1)->dist < dist))
  82. {
  83. vfloat4::store(j, vfloat4::load((float*)(j-1)));
  84. --j;
  85. }
  86. vfloat4::store(j, item);
  87. }
  88. }
  89. public:
  90. T ptr;
  91. unsigned dist;
  92. };
  93. /*! An item on the stack holds the node ID and active ray mask. */
  94. template<typename T>
  95. struct __aligned(8) StackItemMaskT
  96. {
  97. T ptr;
  98. size_t mask;
  99. };
  100. struct __aligned(8) StackItemMaskCoherent
  101. {
  102. size_t mask;
  103. size_t parent;
  104. size_t child;
  105. };
  106. }