MaskedOcclusionCullingAVX512.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. ////////////////////////////////////////////////////////////////////////////////
  2. // Copyright 2017 Intel Corporation
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. // use this file except in compliance with the License. You may obtain a copy
  6. // of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. // License for the specific language governing permissions and limitations
  14. // under the License.
  15. ////////////////////////////////////////////////////////////////////////////////
  16. #include <string.h>
  17. #include <assert.h>
  18. #include <float.h>
  19. #include "MaskedOcclusionCulling.h"
  20. #include "CompilerSpecific.inl"
  21. #if MOC_RECORDER_ENABLE
  22. #include "FrameRecorder.h"
  23. #endif
  24. // Make sure compiler supports AVX-512 intrinsics: Visual Studio 2017 (Update 3) || Intel C++ Compiler 16.0 || Clang 4.0 || GCC 5.0
  25. #if USE_AVX512 != 0 && ((defined(_MSC_VER) && _MSC_VER >= 1911) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600) || (defined(__clang__) && __clang_major__ >= 4) || (defined(__GNUC__) && __GNUC__ >= 5))
  26. // The MaskedOcclusionCullingAVX512.cpp file should be compiled avx2/avx512 architecture options turned on in the compiler. However, the SSE
  27. // version in MaskedOcclusionCulling.cpp _must_ be compiled with SSE2 architecture allow backwards compatibility. Best practice is to
  28. // use lowest supported target platform (e.g. /arch:SSE2) as project default, and elevate only the MaskedOcclusionCullingAVX2/512.cpp files.
  29. #ifndef __AVX2__
  30. #error For best performance, MaskedOcclusionCullingAVX512.cpp should be compiled with /arch:AVX2
  31. #endif
  32. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  33. // AVX specific defines and constants
  34. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  35. #define SIMD_LANES 16
  36. #define TILE_HEIGHT_SHIFT 4
  37. #define SIMD_LANE_IDX _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
  38. #define SIMD_SUB_TILE_COL_OFFSET _mm512_setr_epi32(0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3, 0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3, 0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3, 0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3)
  39. #define SIMD_SUB_TILE_ROW_OFFSET _mm512_setr_epi32(0, 0, 0, 0, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 3, SUB_TILE_HEIGHT * 3, SUB_TILE_HEIGHT * 3, SUB_TILE_HEIGHT * 3)
  40. #define SIMD_SUB_TILE_COL_OFFSET_F _mm512_setr_ps(0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3, 0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3, 0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3, 0, SUB_TILE_WIDTH, SUB_TILE_WIDTH * 2, SUB_TILE_WIDTH * 3)
  41. #define SIMD_SUB_TILE_ROW_OFFSET_F _mm512_setr_ps(0, 0, 0, 0, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 2, SUB_TILE_HEIGHT * 3, SUB_TILE_HEIGHT * 3, SUB_TILE_HEIGHT * 3, SUB_TILE_HEIGHT * 3)
  42. #define SIMD_SHUFFLE_SCANLINE_TO_SUBTILES _mm512_set_epi32(0x0F0B0703, 0x0E0A0602, 0x0D090501, 0x0C080400, 0x0F0B0703, 0x0E0A0602, 0x0D090501, 0x0C080400, 0x0F0B0703, 0x0E0A0602, 0x0D090501, 0x0C080400, 0x0F0B0703, 0x0E0A0602, 0x0D090501, 0x0C080400)
  43. #define SIMD_LANE_YCOORD_I _mm512_setr_epi32(128, 384, 640, 896, 1152, 1408, 1664, 1920, 2176, 2432, 2688, 2944, 3200, 3456, 3712, 3968)
  44. #define SIMD_LANE_YCOORD_F _mm512_setr_ps(128.0f, 384.0f, 640.0f, 896.0f, 1152.0f, 1408.0f, 1664.0f, 1920.0f, 2176.0f, 2432.0f, 2688.0f, 2944.0f, 3200.0f, 3456.0f, 3712.0f, 3968.0f)
  45. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  46. // AVX specific typedefs and functions
  47. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  48. typedef __m512 __mw;
  49. typedef __m512i __mwi;
  50. #define _mmw_set1_ps _mm512_set1_ps
  51. #define _mmw_setzero_ps _mm512_setzero_ps
  52. #define _mmw_and_ps _mm512_and_ps
  53. #define _mmw_or_ps _mm512_or_ps
  54. #define _mmw_xor_ps _mm512_xor_ps
  55. #define _mmw_not_ps(a) _mm512_xor_ps((a), _mm512_castsi512_ps(_mm512_set1_epi32(~0)))
  56. #define _mmw_andnot_ps _mm512_andnot_ps
  57. #define _mmw_neg_ps(a) _mm512_xor_ps((a), _mm512_set1_ps(-0.0f))
  58. #define _mmw_abs_ps(a) _mm512_and_ps((a), _mm512_castsi512_ps(_mm512_set1_epi32(0x7FFFFFFF)))
  59. #define _mmw_add_ps _mm512_add_ps
  60. #define _mmw_sub_ps _mm512_sub_ps
  61. #define _mmw_mul_ps _mm512_mul_ps
  62. #define _mmw_div_ps _mm512_div_ps
  63. #define _mmw_min_ps _mm512_min_ps
  64. #define _mmw_max_ps _mm512_max_ps
  65. #define _mmw_fmadd_ps _mm512_fmadd_ps
  66. #define _mmw_fmsub_ps _mm512_fmsub_ps
  67. #define _mmw_shuffle_ps _mm512_shuffle_ps
  68. #define _mmw_insertf32x4_ps _mm512_insertf32x4
  69. #define _mmw_cvtepi32_ps _mm512_cvtepi32_ps
  70. #define _mmw_blendv_epi32(a,b,c) simd_cast<__mwi>(_mmw_blendv_ps(simd_cast<__mw>(a), simd_cast<__mw>(b), simd_cast<__mw>(c)))
  71. #define _mmw_set1_epi32 _mm512_set1_epi32
  72. #define _mmw_setzero_epi32 _mm512_setzero_si512
  73. #define _mmw_and_epi32 _mm512_and_si512
  74. #define _mmw_or_epi32 _mm512_or_si512
  75. #define _mmw_xor_epi32 _mm512_xor_si512
  76. #define _mmw_not_epi32(a) _mm512_xor_si512((a), _mm512_set1_epi32(~0))
  77. #define _mmw_andnot_epi32 _mm512_andnot_si512
  78. #define _mmw_neg_epi32(a) _mm512_sub_epi32(_mm512_set1_epi32(0), (a))
  79. #define _mmw_add_epi32 _mm512_add_epi32
  80. #define _mmw_sub_epi32 _mm512_sub_epi32
  81. #define _mmw_min_epi32 _mm512_min_epi32
  82. #define _mmw_max_epi32 _mm512_max_epi32
  83. #define _mmw_subs_epu16 _mm512_subs_epu16
  84. #define _mmw_mullo_epi32 _mm512_mullo_epi32
  85. #define _mmw_srai_epi32 _mm512_srai_epi32
  86. #define _mmw_srli_epi32 _mm512_srli_epi32
  87. #define _mmw_slli_epi32 _mm512_slli_epi32
  88. #define _mmw_sllv_ones(x) _mm512_sllv_epi32(SIMD_BITS_ONE, x)
  89. #define _mmw_transpose_epi8(x) _mm512_shuffle_epi8(x, SIMD_SHUFFLE_SCANLINE_TO_SUBTILES)
  90. #define _mmw_abs_epi32 _mm512_abs_epi32
  91. #define _mmw_cvtps_epi32 _mm512_cvtps_epi32
  92. #define _mmw_cvttps_epi32 _mm512_cvttps_epi32
  93. #define _mmx_dp4_ps(a, b) _mm_dp_ps(a, b, 0xFF)
  94. #define _mmx_fmadd_ps _mm_fmadd_ps
  95. #define _mmx_max_epi32 _mm_max_epi32
  96. #define _mmx_min_epi32 _mm_min_epi32
  97. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  98. // SIMD casting functions
  99. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  100. template<typename T, typename Y> FORCE_INLINE T simd_cast(Y A);
  101. template<> FORCE_INLINE __m128 simd_cast<__m128>(float A) { return _mm_set1_ps(A); }
  102. template<> FORCE_INLINE __m128 simd_cast<__m128>(__m128i A) { return _mm_castsi128_ps(A); }
  103. template<> FORCE_INLINE __m128 simd_cast<__m128>(__m128 A) { return A; }
  104. template<> FORCE_INLINE __m128i simd_cast<__m128i>(int A) { return _mm_set1_epi32(A); }
  105. template<> FORCE_INLINE __m128i simd_cast<__m128i>(__m128 A) { return _mm_castps_si128(A); }
  106. template<> FORCE_INLINE __m128i simd_cast<__m128i>(__m128i A) { return A; }
  107. template<> FORCE_INLINE __m256 simd_cast<__m256>(float A) { return _mm256_set1_ps(A); }
  108. template<> FORCE_INLINE __m256 simd_cast<__m256>(__m256i A) { return _mm256_castsi256_ps(A); }
  109. template<> FORCE_INLINE __m256 simd_cast<__m256>(__m256 A) { return A; }
  110. template<> FORCE_INLINE __m256i simd_cast<__m256i>(int A) { return _mm256_set1_epi32(A); }
  111. template<> FORCE_INLINE __m256i simd_cast<__m256i>(__m256 A) { return _mm256_castps_si256(A); }
  112. template<> FORCE_INLINE __m256i simd_cast<__m256i>(__m256i A) { return A; }
  113. template<> FORCE_INLINE __m512 simd_cast<__m512>(float A) { return _mm512_set1_ps(A); }
  114. template<> FORCE_INLINE __m512 simd_cast<__m512>(__m512i A) { return _mm512_castsi512_ps(A); }
  115. template<> FORCE_INLINE __m512 simd_cast<__m512>(__m512 A) { return A; }
  116. template<> FORCE_INLINE __m512i simd_cast<__m512i>(int A) { return _mm512_set1_epi32(A); }
  117. template<> FORCE_INLINE __m512i simd_cast<__m512i>(__m512 A) { return _mm512_castps_si512(A); }
  118. template<> FORCE_INLINE __m512i simd_cast<__m512i>(__m512i A) { return A; }
  119. #define MAKE_ACCESSOR(name, simd_type, base_type, is_const, elements) \
  120. FORCE_INLINE is_const base_type * name(is_const simd_type &a) { \
  121. union accessor { simd_type m_native; base_type m_array[elements]; }; \
  122. is_const accessor *acs = reinterpret_cast<is_const accessor*>(&a); \
  123. return acs->m_array; \
  124. }
  125. MAKE_ACCESSOR(simd_f32, __m128, float, , 4)
  126. MAKE_ACCESSOR(simd_f32, __m128, float, const, 4)
  127. MAKE_ACCESSOR(simd_i32, __m128i, int, , 4)
  128. MAKE_ACCESSOR(simd_i32, __m128i, int, const, 4)
  129. MAKE_ACCESSOR(simd_f32, __m256, float, , 8)
  130. MAKE_ACCESSOR(simd_f32, __m256, float, const, 8)
  131. MAKE_ACCESSOR(simd_i32, __m256i, int, , 8)
  132. MAKE_ACCESSOR(simd_i32, __m256i, int, const, 8)
  133. MAKE_ACCESSOR(simd_f32, __m512, float, , 16)
  134. MAKE_ACCESSOR(simd_f32, __m512, float, const, 16)
  135. MAKE_ACCESSOR(simd_i32, __m512i, int, , 16)
  136. MAKE_ACCESSOR(simd_i32, __m512i, int, const, 16)
  137. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  138. // Specialized AVX input assembly function for general vertex gather
  139. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  140. typedef MaskedOcclusionCulling::VertexLayout VertexLayout;
  141. FORCE_INLINE void GatherVertices(__m512 *vtxX, __m512 *vtxY, __m512 *vtxW, const float *inVtx, const unsigned int *inTrisPtr, int numLanes, const VertexLayout &vtxLayout)
  142. {
  143. assert(numLanes >= 1);
  144. const __m512i SIMD_TRI_IDX_OFFSET = _mm512_setr_epi32(0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45);
  145. static const __m512i SIMD_LANE_MASK[17] = {
  146. _mm512_setr_epi32( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  147. _mm512_setr_epi32(~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  148. _mm512_setr_epi32(~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  149. _mm512_setr_epi32(~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  150. _mm512_setr_epi32(~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  151. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  152. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  153. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  154. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0),
  155. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0),
  156. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0),
  157. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0),
  158. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0),
  159. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0),
  160. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0),
  161. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0),
  162. _mm512_setr_epi32(~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0)
  163. };
  164. // Compute per-lane index list offset that guards against out of bounds memory accesses
  165. __m512i safeTriIdxOffset = _mm512_and_si512(SIMD_TRI_IDX_OFFSET, SIMD_LANE_MASK[numLanes]);
  166. // Fetch triangle indices.
  167. __m512i vtxIdx[3];
  168. vtxIdx[0] = _mmw_mullo_epi32(_mm512_i32gather_epi32(safeTriIdxOffset, (const int*)inTrisPtr + 0, 4), _mmw_set1_epi32(vtxLayout.mStride));
  169. vtxIdx[1] = _mmw_mullo_epi32(_mm512_i32gather_epi32(safeTriIdxOffset, (const int*)inTrisPtr + 1, 4), _mmw_set1_epi32(vtxLayout.mStride));
  170. vtxIdx[2] = _mmw_mullo_epi32(_mm512_i32gather_epi32(safeTriIdxOffset, (const int*)inTrisPtr + 2, 4), _mmw_set1_epi32(vtxLayout.mStride));
  171. char *vPtr = (char *)inVtx;
  172. // Fetch triangle vertices
  173. for (int i = 0; i < 3; i++)
  174. {
  175. vtxX[i] = _mm512_i32gather_ps(vtxIdx[i], (float *)vPtr, 1);
  176. vtxY[i] = _mm512_i32gather_ps(vtxIdx[i], (float *)(vPtr + vtxLayout.mOffsetY), 1);
  177. vtxW[i] = _mm512_i32gather_ps(vtxIdx[i], (float *)(vPtr + vtxLayout.mOffsetW), 1);
  178. }
  179. }
  180. namespace MaskedOcclusionCullingAVX512
  181. {
  182. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  183. // Poorly implemented functions. TODO: fix common (maskedOcclusionCullingCommon.inl) code to improve perf
  184. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  185. FORCE_INLINE __m512 _mmw_floor_ps(__m512 x)
  186. {
  187. return _mm512_roundscale_ps(x, 1); // 1 = floor
  188. }
  189. FORCE_INLINE __m512 _mmw_ceil_ps(__m512 x)
  190. {
  191. return _mm512_roundscale_ps(x, 2); // 2 = ceil
  192. }
  193. FORCE_INLINE __m512i _mmw_cmpeq_epi32(__m512i a, __m512i b)
  194. {
  195. __mmask16 mask = _mm512_cmpeq_epi32_mask(a, b);
  196. return _mm512_mask_mov_epi32(_mm512_set1_epi32(0), mask, _mm512_set1_epi32(~0));
  197. }
  198. FORCE_INLINE __m512i _mmw_cmpgt_epi32(__m512i a, __m512i b)
  199. {
  200. __mmask16 mask = _mm512_cmpgt_epi32_mask(a, b);
  201. return _mm512_mask_mov_epi32(_mm512_set1_epi32(0), mask, _mm512_set1_epi32(~0));
  202. }
  203. FORCE_INLINE bool _mmw_testz_epi32(__m512i a, __m512i b)
  204. {
  205. __mmask16 mask = _mm512_cmpeq_epi32_mask(_mm512_and_si512(a, b), _mm512_set1_epi32(0));
  206. return mask == 0xFFFF;
  207. }
  208. FORCE_INLINE __m512 _mmw_cmpge_ps(__m512 a, __m512 b)
  209. {
  210. __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ);
  211. return _mm512_castsi512_ps(_mm512_mask_mov_epi32(_mm512_set1_epi32(0), mask, _mm512_set1_epi32(~0)));
  212. }
  213. FORCE_INLINE __m512 _mmw_cmpgt_ps(__m512 a, __m512 b)
  214. {
  215. __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_GT_OQ);
  216. return _mm512_castsi512_ps(_mm512_mask_mov_epi32(_mm512_set1_epi32(0), mask, _mm512_set1_epi32(~0)));
  217. }
  218. FORCE_INLINE __m512 _mmw_cmpeq_ps(__m512 a, __m512 b)
  219. {
  220. __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);
  221. return _mm512_castsi512_ps(_mm512_mask_mov_epi32(_mm512_set1_epi32(0), mask, _mm512_set1_epi32(~0)));
  222. }
  223. FORCE_INLINE __mmask16 _mmw_movemask_ps(const __m512 &a)
  224. {
  225. __mmask16 mask = _mm512_cmp_epi32_mask(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x80000000)), _mm512_set1_epi32(0), 4); // a & 0x8000000 != 0
  226. return mask;
  227. }
  228. FORCE_INLINE __m512 _mmw_blendv_ps(const __m512 &a, const __m512 &b, const __m512 &c)
  229. {
  230. __mmask16 mask = _mmw_movemask_ps(c);
  231. return _mm512_mask_mov_ps(a, mask, b);
  232. }
  233. static MaskedOcclusionCulling::Implementation gInstructionSet = MaskedOcclusionCulling::AVX512;
  234. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  235. // Include common algorithm implementation (general, SIMD independent code)
  236. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  237. #include "MaskedOcclusionCullingCommon.inl"
  238. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  239. // Utility function to create a new object using the allocator callbacks
  240. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  241. typedef MaskedOcclusionCulling::pfnAlignedAlloc pfnAlignedAlloc;
  242. typedef MaskedOcclusionCulling::pfnAlignedFree pfnAlignedFree;
  243. MaskedOcclusionCulling *CreateMaskedOcclusionCulling(pfnAlignedAlloc alignedAlloc, pfnAlignedFree alignedFree)
  244. {
  245. MaskedOcclusionCullingPrivate *object = (MaskedOcclusionCullingPrivate *)alignedAlloc(64, sizeof(MaskedOcclusionCullingPrivate));
  246. new (object) MaskedOcclusionCullingPrivate(alignedAlloc, alignedFree);
  247. return object;
  248. }
  249. };
  250. #else
  251. namespace MaskedOcclusionCullingAVX512
  252. {
  253. typedef MaskedOcclusionCulling::pfnAlignedAlloc pfnAlignedAlloc;
  254. typedef MaskedOcclusionCulling::pfnAlignedFree pfnAlignedFree;
  255. MaskedOcclusionCulling *CreateMaskedOcclusionCulling(pfnAlignedAlloc alignedAlloc, pfnAlignedFree alignedFree)
  256. {
  257. return nullptr;
  258. }
  259. };
  260. #endif