simd.h 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../math/math.h"
  5. /* include SSE wrapper classes */
  6. #if defined(__SSE__) || defined(__ARM_NEON)
  7. # include "sse.h"
  8. #endif
  9. /* include AVX wrapper classes */
  10. #if defined(__AVX__)
  11. # include "avx.h"
  12. #endif
  13. /* include AVX512 wrapper classes */
  14. #if defined (__AVX512F__)
  15. # include "avx512.h"
  16. #endif
  17. namespace embree
  18. {
  19. template <int N>
  20. __forceinline vbool<N> isfinite(const vfloat<N>& v)
  21. {
  22. return (v >= vfloat<N>(-std::numeric_limits<float>::max()))
  23. & (v <= vfloat<N>( std::numeric_limits<float>::max()));
  24. }
  25. /* foreach unique */
  26. template<typename vbool, typename vint, typename Closure>
  27. __forceinline void foreach_unique(const vbool& valid0, const vint& vi, const Closure& closure)
  28. {
  29. vbool valid1 = valid0;
  30. while (any(valid1)) {
  31. const int j = int(bsf(movemask(valid1)));
  32. const int i = vi[j];
  33. const vbool valid2 = valid1 & (i == vi);
  34. valid1 = andn(valid1, valid2);
  35. closure(valid2, i);
  36. }
  37. }
  38. /* returns the next unique value i in vi and the corresponding valid_i mask */
  39. template<typename vbool, typename vint>
  40. __forceinline int next_unique(vbool& valid, const vint& vi, /*out*/ vbool& valid_i)
  41. {
  42. assert(any(valid));
  43. const int j = int(bsf(movemask(valid)));
  44. const int i = vi[j];
  45. valid_i = valid & (i == vi);
  46. valid = andn(valid, valid_i);
  47. return i;
  48. }
  49. /* foreach unique index */
  50. template<typename vbool, typename vint, typename Closure>
  51. __forceinline void foreach_unique_index(const vbool& valid0, const vint& vi, const Closure& closure)
  52. {
  53. vbool valid1 = valid0;
  54. while (any(valid1)) {
  55. const int j = int(bsf(movemask(valid1)));
  56. const int i = vi[j];
  57. const vbool valid2 = valid1 & (i == vi);
  58. valid1 = andn(valid1, valid2);
  59. closure(valid2, i, j);
  60. }
  61. }
  62. /* returns the index of the next unique value i in vi and the corresponding valid_i mask */
  63. template<typename vbool, typename vint>
  64. __forceinline int next_unique_index(vbool& valid, const vint& vi, /*out*/ vbool& valid_i)
  65. {
  66. assert(any(valid));
  67. const int j = int(bsf(movemask(valid)));
  68. const int i = vi[j];
  69. valid_i = valid & (i == vi);
  70. valid = andn(valid, valid_i);
  71. return j;
  72. }
  73. template<typename Closure>
  74. __forceinline void foreach2(int x0, int x1, int y0, int y1, const Closure& closure)
  75. {
  76. __aligned(64) int U[2*VSIZEX];
  77. __aligned(64) int V[2*VSIZEX];
  78. int index = 0;
  79. for (int y=y0; y<y1; y++) {
  80. const bool lasty = y+1>=y1;
  81. const vintx vy = y;
  82. for (int x=x0; x<x1; ) { //x+=VSIZEX) {
  83. const bool lastx = x+VSIZEX >= x1;
  84. vintx vx = x+vintx(step);
  85. vintx::storeu(&U[index], vx);
  86. vintx::storeu(&V[index], vy);
  87. const int dx = min(x1-x,VSIZEX);
  88. index += dx;
  89. x += dx;
  90. if (index >= VSIZEX || (lastx && lasty)) {
  91. const vboolx valid = vintx(step) < vintx(index);
  92. closure(valid, vintx::load(U), vintx::load(V));
  93. x-= max(0, index-VSIZEX);
  94. index = 0;
  95. }
  96. }
  97. }
  98. }
  99. }