123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- // Copyright 2009-2021 Intel Corporation
- // SPDX-License-Identifier: Apache-2.0
- #pragma once
- #include "../common/default.h"
- /* force a complete cache invalidation when running out of allocation space */
- #define FORCE_SIMPLE_FLUSH 0
- #define THREAD_BLOCK_ATOMIC_ADD 4
- #if defined(DEBUG)
- #define CACHE_STATS(x)
- #else
- #define CACHE_STATS(x)
- #endif
- namespace embree
- {
- class SharedTessellationCacheStats
- {
- public:
- /* stats */
- static std::atomic<size_t> cache_accesses;
- static std::atomic<size_t> cache_hits;
- static std::atomic<size_t> cache_misses;
- static std::atomic<size_t> cache_flushes;
- static size_t cache_num_patches;
- __aligned(64) static SpinLock mtx;
-
- /* print stats for debugging */
- static void printStats();
- static void clearStats();
- };
-
- void resizeTessellationCache(size_t new_size);
- void resetTessellationCache();
-
- ////////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////////
- struct __aligned(64) ThreadWorkState
- {
- ALIGNED_STRUCT_(64);
- std::atomic<size_t> counter;
- ThreadWorkState* next;
- bool allocated;
- __forceinline ThreadWorkState(bool allocated = false)
- : counter(0), next(nullptr), allocated(allocated)
- {
- assert( ((size_t)this % 64) == 0 );
- }
- };
- class __aligned(64) SharedLazyTessellationCache
- {
- public:
-
- static const size_t NUM_CACHE_SEGMENTS = 8;
- static const size_t NUM_PREALLOC_THREAD_WORK_STATES = 512;
- static const size_t COMMIT_INDEX_SHIFT = 32+8;
- #if defined(__64BIT__)
- static const size_t REF_TAG_MASK = 0xffffffffff;
- #else
- static const size_t REF_TAG_MASK = 0x7FFFFFFF;
- #endif
- static const size_t MAX_TESSELLATION_CACHE_SIZE = REF_TAG_MASK+1;
- static const size_t BLOCK_SIZE = 64;
-
- /*! Per thread tessellation ref cache */
- static __thread ThreadWorkState* init_t_state;
- static ThreadWorkState* current_t_state;
-
- static __forceinline ThreadWorkState *threadState()
- {
- if (unlikely(!init_t_state))
- /* sets init_t_state, can't return pointer due to macosx icc bug*/
- SharedLazyTessellationCache::sharedLazyTessellationCache.getNextRenderThreadWorkState();
- return init_t_state;
- }
- struct Tag
- {
- __forceinline Tag() : data(0) {}
- __forceinline Tag(void* ptr, size_t combinedTime) {
- init(ptr,combinedTime);
- }
- __forceinline Tag(size_t ptr, size_t combinedTime) {
- init((void*)ptr,combinedTime);
- }
- __forceinline void init(void* ptr, size_t combinedTime)
- {
- if (ptr == nullptr) {
- data = 0;
- return;
- }
- int64_t new_root_ref = (int64_t) ptr;
- new_root_ref -= (int64_t)SharedLazyTessellationCache::sharedLazyTessellationCache.getDataPtr();
- assert( new_root_ref <= (int64_t)REF_TAG_MASK );
- new_root_ref |= (int64_t)combinedTime << COMMIT_INDEX_SHIFT;
- data = new_root_ref;
- }
- __forceinline int64_t get() const { return data.load(); }
- __forceinline void set( int64_t v ) { data.store(v); }
- __forceinline void reset() { data.store(0); }
- private:
- atomic<int64_t> data;
- };
- static __forceinline size_t extractCommitIndex(const int64_t v) { return v >> SharedLazyTessellationCache::COMMIT_INDEX_SHIFT; }
- struct CacheEntry
- {
- Tag tag;
- SpinLock mutex;
- };
- private:
- float *data;
- bool hugepages;
- size_t size;
- size_t maxBlocks;
- ThreadWorkState *threadWorkState;
-
- __aligned(64) std::atomic<size_t> localTime;
- __aligned(64) std::atomic<size_t> next_block;
- __aligned(64) SpinLock reset_state;
- __aligned(64) SpinLock linkedlist_mtx;
- __aligned(64) std::atomic<size_t> switch_block_threshold;
- __aligned(64) std::atomic<size_t> numRenderThreads;
- public:
-
- SharedLazyTessellationCache();
- ~SharedLazyTessellationCache();
- void getNextRenderThreadWorkState();
- __forceinline size_t maxAllocSize() const {
- return switch_block_threshold;
- }
- __forceinline size_t getCurrentIndex() { return localTime.load(); }
- __forceinline void addCurrentIndex(const size_t i=1) { localTime.fetch_add(i); }
- __forceinline size_t getTime(const size_t globalTime) {
- return localTime.load()+NUM_CACHE_SEGMENTS*globalTime;
- }
- __forceinline size_t lockThread (ThreadWorkState *const t_state, const ssize_t plus=1) { return t_state->counter.fetch_add(plus); }
- __forceinline size_t unlockThread(ThreadWorkState *const t_state, const ssize_t plus=-1) { assert(isLocked(t_state)); return t_state->counter.fetch_add(plus); }
- __forceinline bool isLocked(ThreadWorkState *const t_state) { return t_state->counter.load() != 0; }
- static __forceinline void lock () { sharedLazyTessellationCache.lockThread(threadState()); }
- static __forceinline void unlock() { sharedLazyTessellationCache.unlockThread(threadState()); }
- static __forceinline bool isLocked() { return sharedLazyTessellationCache.isLocked(threadState()); }
- static __forceinline size_t getState() { return threadState()->counter.load(); }
- static __forceinline void lockThreadLoop() { sharedLazyTessellationCache.lockThreadLoop(threadState()); }
- static __forceinline size_t getTCacheTime(const size_t globalTime) {
- return sharedLazyTessellationCache.getTime(globalTime);
- }
- /* per thread lock */
- __forceinline void lockThreadLoop (ThreadWorkState *const t_state)
- {
- while(1)
- {
- size_t lock = SharedLazyTessellationCache::sharedLazyTessellationCache.lockThread(t_state,1);
- if (unlikely(lock >= THREAD_BLOCK_ATOMIC_ADD))
- {
- /* lock failed wait until sync phase is over */
- sharedLazyTessellationCache.unlockThread(t_state,-1);
- sharedLazyTessellationCache.waitForUsersLessEqual(t_state,0);
- }
- else
- break;
- }
- }
- static __forceinline void* lookup(CacheEntry& entry, size_t globalTime)
- {
- const int64_t subdiv_patch_root_ref = entry.tag.get();
- CACHE_STATS(SharedTessellationCacheStats::cache_accesses++);
-
- if (likely(subdiv_patch_root_ref != 0))
- {
- const size_t subdiv_patch_root = (subdiv_patch_root_ref & REF_TAG_MASK) + (size_t)sharedLazyTessellationCache.getDataPtr();
- const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref);
-
- if (likely( sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime) ))
- {
- CACHE_STATS(SharedTessellationCacheStats::cache_hits++);
- return (void*) subdiv_patch_root;
- }
- }
- CACHE_STATS(SharedTessellationCacheStats::cache_misses++);
- return nullptr;
- }
- template<typename Constructor>
- static __forceinline auto lookup (CacheEntry& entry, size_t globalTime, const Constructor constructor, const bool before=false) -> decltype(constructor())
- {
- ThreadWorkState *t_state = SharedLazyTessellationCache::threadState();
- while (true)
- {
- sharedLazyTessellationCache.lockThreadLoop(t_state);
- void* patch = SharedLazyTessellationCache::lookup(entry,globalTime);
- if (patch) return (decltype(constructor())) patch;
-
- if (entry.mutex.try_lock())
- {
- if (!validTag(entry.tag,globalTime))
- {
- auto timeBefore = sharedLazyTessellationCache.getTime(globalTime);
- auto ret = constructor(); // thread is locked here!
- assert(ret);
- /* this should never return nullptr */
- auto timeAfter = sharedLazyTessellationCache.getTime(globalTime);
- auto time = before ? timeBefore : timeAfter;
- __memory_barrier();
- entry.tag = SharedLazyTessellationCache::Tag(ret,time);
- __memory_barrier();
- entry.mutex.unlock();
- return ret;
- }
- entry.mutex.unlock();
- }
- SharedLazyTessellationCache::sharedLazyTessellationCache.unlockThread(t_state);
- }
- }
-
- __forceinline bool validCacheIndex(const size_t i, const size_t globalTime)
- {
- #if FORCE_SIMPLE_FLUSH == 1
- return i == getTime(globalTime);
- #else
- return i+(NUM_CACHE_SEGMENTS-1) >= getTime(globalTime);
- #endif
- }
- static __forceinline bool validTime(const size_t oldtime, const size_t newTime)
- {
- return oldtime+(NUM_CACHE_SEGMENTS-1) >= newTime;
- }
- static __forceinline bool validTag(const Tag& tag, size_t globalTime)
- {
- const int64_t subdiv_patch_root_ref = tag.get();
- if (subdiv_patch_root_ref == 0) return false;
- const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref);
- return sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime);
- }
- void waitForUsersLessEqual(ThreadWorkState *const t_state,
- const unsigned int users);
-
- __forceinline size_t alloc(const size_t blocks)
- {
- if (unlikely(blocks >= switch_block_threshold))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"allocation exceeds size of tessellation cache segment");
- assert(blocks < switch_block_threshold);
- size_t index = next_block.fetch_add(blocks);
- if (unlikely(index + blocks >= switch_block_threshold)) return (size_t)-1;
- return index;
- }
- static __forceinline void* malloc(const size_t bytes)
- {
- size_t block_index = -1;
- ThreadWorkState *const t_state = threadState();
- while (true)
- {
- block_index = sharedLazyTessellationCache.alloc((bytes+BLOCK_SIZE-1)/BLOCK_SIZE);
- if (block_index == (size_t)-1)
- {
- sharedLazyTessellationCache.unlockThread(t_state);
- sharedLazyTessellationCache.allocNextSegment();
- sharedLazyTessellationCache.lockThread(t_state);
- continue;
- }
- break;
- }
- return sharedLazyTessellationCache.getBlockPtr(block_index);
- }
- __forceinline void *getBlockPtr(const size_t block_index)
- {
- assert(block_index < maxBlocks);
- assert(data);
- assert(block_index*16 <= size);
- return (void*)&data[block_index*16];
- }
- __forceinline void* getDataPtr() { return data; }
- __forceinline size_t getNumUsedBytes() { return next_block * BLOCK_SIZE; }
- __forceinline size_t getMaxBlocks() { return maxBlocks; }
- __forceinline size_t getSize() { return size; }
- void allocNextSegment();
- void realloc(const size_t newSize);
- void reset();
- static SharedLazyTessellationCache sharedLazyTessellationCache;
- };
- }
|