vfetchanalyzer.cpp 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2. #include "meshoptimizer.h"
  3. #include <assert.h>
  4. #include <string.h>
  5. meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
  6. {
  7. assert(index_count % 3 == 0);
  8. assert(vertex_size > 0 && vertex_size <= 256);
  9. meshopt_Allocator allocator;
  10. meshopt_VertexFetchStatistics result = {};
  11. unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
  12. memset(vertex_visited, 0, vertex_count);
  13. const size_t kCacheLine = 64;
  14. const size_t kCacheSize = 128 * 1024;
  15. // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
  16. size_t cache[kCacheSize / kCacheLine] = {};
  17. for (size_t i = 0; i < index_count; ++i)
  18. {
  19. unsigned int index = indices[i];
  20. assert(index < vertex_count);
  21. vertex_visited[index] = 1;
  22. size_t start_address = index * vertex_size;
  23. size_t end_address = start_address + vertex_size;
  24. size_t start_tag = start_address / kCacheLine;
  25. size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
  26. assert(start_tag < end_tag);
  27. for (size_t tag = start_tag; tag < end_tag; ++tag)
  28. {
  29. size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
  30. // we store +1 since cache is filled with 0 by default
  31. result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
  32. cache[line] = tag + 1;
  33. }
  34. }
  35. size_t unique_vertex_count = 0;
  36. for (size_t i = 0; i < vertex_count; ++i)
  37. unique_vertex_count += vertex_visited[i];
  38. result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
  39. return result;
  40. }