123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- // Copyright 2009-2021 Intel Corporation
- // SPDX-License-Identifier: Apache-2.0
- #pragma once
- #include "default.h"
- namespace embree
- {
- /*! An item on the stack holds the node ID and distance of that node. */
- template<typename T>
- struct __aligned(16) StackItemT
- {
- /*! assert that the xchg function works */
- static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
- __forceinline StackItemT() {}
- __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
- /*! use SSE instructions to swap stack items */
- __forceinline static void xchg(StackItemT& a, StackItemT& b)
- {
- const vfloat4 sse_a = vfloat4::load((float*)&a);
- const vfloat4 sse_b = vfloat4::load((float*)&b);
- vfloat4::store(&a,sse_b);
- vfloat4::store(&b,sse_a);
- }
- /*! Sort 2 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
- if (s2.dist < s1.dist) xchg(s2,s1);
- }
-
- /*! Sort 3 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
- {
- if (s2.dist < s1.dist) xchg(s2,s1);
- if (s3.dist < s2.dist) xchg(s3,s2);
- if (s2.dist < s1.dist) xchg(s2,s1);
- }
-
- /*! Sort 4 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
- {
- if (s2.dist < s1.dist) xchg(s2,s1);
- if (s4.dist < s3.dist) xchg(s4,s3);
- if (s3.dist < s1.dist) xchg(s3,s1);
- if (s4.dist < s2.dist) xchg(s4,s2);
- if (s3.dist < s2.dist) xchg(s3,s2);
- }
- /*! use SSE instructions to swap stack items */
- __forceinline static void cmp_xchg(vint4& a, vint4& b)
- {
- #if defined(__AVX512VL__)
- const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
- #else
- const vboolf4 mask0(b < a);
- const vboolf4 mask(shuffle<2,2,2,2>(mask0));
- #endif
- const vint4 c = select(mask,b,a);
- const vint4 d = select(mask,a,b);
- a = c;
- b = d;
- }
-
- /*! Sort 3 stack items. */
- __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
- {
- cmp_xchg(s2,s1);
- cmp_xchg(s3,s2);
- cmp_xchg(s2,s1);
- }
-
- /*! Sort 4 stack items. */
- __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
- {
- cmp_xchg(s2,s1);
- cmp_xchg(s4,s3);
- cmp_xchg(s3,s1);
- cmp_xchg(s4,s2);
- cmp_xchg(s3,s2);
- }
- /*! Sort N stack items. */
- __forceinline friend void sort(StackItemT* begin, StackItemT* end)
- {
- for (StackItemT* i = begin+1; i != end; ++i)
- {
- const vfloat4 item = vfloat4::load((float*)i);
- const unsigned dist = i->dist;
- StackItemT* j = i;
- while ((j != begin) && ((j-1)->dist < dist))
- {
- vfloat4::store(j, vfloat4::load((float*)(j-1)));
- --j;
- }
- vfloat4::store(j, item);
- }
- }
-
- public:
- T ptr;
- unsigned dist;
- };
- /*! An item on the stack holds the node ID and active ray mask. */
- template<typename T>
- struct __aligned(8) StackItemMaskT
- {
- T ptr;
- size_t mask;
- };
- struct __aligned(8) StackItemMaskCoherent
- {
- size_t mask;
- size_t parent;
- size_t child;
- };
- }
|