VertexLoaderBase.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. // Copyright 2014 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "VideoCommon/VertexLoaderBase.h"
  4. #include <array>
  5. #include <bit>
  6. #include <cstring>
  7. #include <memory>
  8. #include <string>
  9. #include <vector>
  10. #include <fmt/format.h>
  11. #include <fmt/ranges.h>
  12. #include "Common/Assert.h"
  13. #include "Common/BitUtils.h"
  14. #include "Common/CommonTypes.h"
  15. #include "Common/Logging/Log.h"
  16. #include "Common/MsgHandler.h"
  17. #include "VideoCommon/VertexLoader.h"
  18. #include "VideoCommon/VertexLoaderManager.h"
  19. #include "VideoCommon/VertexLoader_Color.h"
  20. #include "VideoCommon/VertexLoader_Normal.h"
  21. #include "VideoCommon/VertexLoader_Position.h"
  22. #include "VideoCommon/VertexLoader_TextCoord.h"
  23. #ifdef _M_X86_64
  24. #include "VideoCommon/VertexLoaderX64.h"
  25. #elif defined(_M_ARM_64)
  26. #include "VideoCommon/VertexLoaderARM64.h"
  27. #endif
  28. // a hacky implementation to compare two vertex loaders
  29. class VertexLoaderTester : public VertexLoaderBase
  30. {
  31. public:
  32. VertexLoaderTester(std::unique_ptr<VertexLoaderBase> a_, std::unique_ptr<VertexLoaderBase> b_,
  33. const TVtxDesc& vtx_desc, const VAT& vtx_attr)
  34. : VertexLoaderBase(vtx_desc, vtx_attr), a(std::move(a_)), b(std::move(b_))
  35. {
  36. ASSERT(a && b);
  37. if (a->m_vertex_size == b->m_vertex_size && a->m_native_components == b->m_native_components &&
  38. a->m_native_vtx_decl.stride == b->m_native_vtx_decl.stride)
  39. {
  40. // These are generated from the VAT and vertex desc, so they should match.
  41. // m_native_vtx_decl.stride isn't set yet, though.
  42. ASSERT(m_vertex_size == a->m_vertex_size && m_native_components == a->m_native_components);
  43. memcpy(&m_native_vtx_decl, &a->m_native_vtx_decl, sizeof(PortableVertexDeclaration));
  44. }
  45. else
  46. {
  47. PanicAlertFmt("Can't compare vertex loaders that expect different vertex formats!\n"
  48. "a: m_vertex_size {}, m_native_components {:#010x}, stride {}\n"
  49. "b: m_vertex_size {}, m_native_components {:#010x}, stride {}",
  50. a->m_vertex_size, a->m_native_components, a->m_native_vtx_decl.stride,
  51. b->m_vertex_size, b->m_native_components, b->m_native_vtx_decl.stride);
  52. }
  53. }
  54. int RunVertices(const u8* src, u8* dst, int count) override
  55. {
  56. buffer_a.resize(count * a->m_native_vtx_decl.stride + 4);
  57. buffer_b.resize(count * b->m_native_vtx_decl.stride + 4);
  58. const std::array<u32, 3> old_position_matrix_index_cache =
  59. VertexLoaderManager::position_matrix_index_cache;
  60. const std::array<std::array<float, 4>, 3> old_position_cache =
  61. VertexLoaderManager::position_cache;
  62. const std::array<float, 4> old_normal_cache = VertexLoaderManager::normal_cache;
  63. const std::array<float, 4> old_tangent_cache = VertexLoaderManager::tangent_cache;
  64. const std::array<float, 4> old_binormal_cache = VertexLoaderManager::binormal_cache;
  65. const int count_a = a->RunVertices(src, buffer_a.data(), count);
  66. const std::array<u32, 3> a_position_matrix_index_cache =
  67. VertexLoaderManager::position_matrix_index_cache;
  68. const std::array<std::array<float, 4>, 3> a_position_cache =
  69. VertexLoaderManager::position_cache;
  70. const std::array<float, 4> a_normal_cache = VertexLoaderManager::normal_cache;
  71. const std::array<float, 4> a_tangent_cache = VertexLoaderManager::tangent_cache;
  72. const std::array<float, 4> a_binormal_cache = VertexLoaderManager::binormal_cache;
  73. // Reset state before running b
  74. VertexLoaderManager::position_matrix_index_cache = old_position_matrix_index_cache;
  75. VertexLoaderManager::position_cache = old_position_cache;
  76. VertexLoaderManager::normal_cache = old_normal_cache;
  77. VertexLoaderManager::tangent_cache = old_tangent_cache;
  78. VertexLoaderManager::binormal_cache = old_binormal_cache;
  79. const int count_b = b->RunVertices(src, buffer_b.data(), count);
  80. const std::array<u32, 3> b_position_matrix_index_cache =
  81. VertexLoaderManager::position_matrix_index_cache;
  82. const std::array<std::array<float, 4>, 3> b_position_cache =
  83. VertexLoaderManager::position_cache;
  84. const std::array<float, 4> b_normal_cache = VertexLoaderManager::normal_cache;
  85. const std::array<float, 4> b_tangent_cache = VertexLoaderManager::tangent_cache;
  86. const std::array<float, 4> b_binormal_cache = VertexLoaderManager::binormal_cache;
  87. ASSERT_MSG(VIDEO, count_a == count_b,
  88. "The two vertex loaders have loaded a different amount of vertices (a: {}, b: {}).",
  89. count_a, count_b);
  90. ASSERT_MSG(VIDEO,
  91. memcmp(buffer_a.data(), buffer_b.data(),
  92. std::min(count_a, count_b) * m_native_vtx_decl.stride) == 0,
  93. "The two vertex loaders have loaded different data. Configuration:"
  94. "\nVertex desc:\n{}\n\nVertex attr:\n{}",
  95. m_VtxDesc, m_VtxAttr);
  96. ASSERT_MSG(VIDEO, a_position_matrix_index_cache == b_position_matrix_index_cache,
  97. "Expected matching position matrix caches after loading (a: {}; b: {})",
  98. fmt::join(a_position_matrix_index_cache, ", "),
  99. fmt::join(b_position_matrix_index_cache, ", "));
  100. // Some games (e.g. Donkey Kong Country Returns) have a few draws that contain NaN.
  101. // Since NaN != NaN, we need to compare the bits instead.
  102. const auto bit_equal = [](float val_a, float val_b) {
  103. return std::bit_cast<u32>(val_a) == std::bit_cast<u32>(val_b);
  104. };
  105. // The last element is allowed to be garbage for SIMD overwrites.
  106. // For XY, the last 2 are garbage.
  107. const bool positions_match = [&] {
  108. const size_t max_component = m_VtxAttr.g0.PosElements == CoordComponentCount::XYZ ? 3 : 2;
  109. for (size_t vertex = 0; vertex < 3; vertex++)
  110. {
  111. if (!std::equal(a_position_cache[vertex].begin(),
  112. a_position_cache[vertex].begin() + max_component,
  113. b_position_cache[vertex].begin(), bit_equal))
  114. {
  115. return false;
  116. }
  117. }
  118. return true;
  119. }();
  120. ASSERT_MSG(VIDEO, positions_match,
  121. "Expected matching position caches after loading (a: {} / {} / {}; b: {} / {} / {})",
  122. fmt::join(a_position_cache[0], ", "), fmt::join(a_position_cache[1], ", "),
  123. fmt::join(a_position_cache[2], ", "), fmt::join(b_position_cache[0], ", "),
  124. fmt::join(b_position_cache[1], ", "), fmt::join(b_position_cache[2], ", "));
  125. // The last element is allowed to be garbage for SIMD overwrites
  126. ASSERT_MSG(VIDEO,
  127. std::equal(a_normal_cache.begin(), a_normal_cache.begin() + 3,
  128. b_normal_cache.begin(), b_normal_cache.begin() + 3, bit_equal),
  129. "Expected matching normal caches after loading (a: {}; b: {})",
  130. fmt::join(a_normal_cache, ", "), fmt::join(b_normal_cache, ", "));
  131. ASSERT_MSG(VIDEO,
  132. std::equal(a_tangent_cache.begin(), a_tangent_cache.begin() + 3,
  133. b_tangent_cache.begin(), b_tangent_cache.begin() + 3, bit_equal),
  134. "Expected matching tangent caches after loading (a: {}; b: {})",
  135. fmt::join(a_tangent_cache, ", "), fmt::join(b_tangent_cache, ", "));
  136. ASSERT_MSG(VIDEO,
  137. std::equal(a_binormal_cache.begin(), a_binormal_cache.begin() + 3,
  138. b_binormal_cache.begin(), b_binormal_cache.begin() + 3, bit_equal),
  139. "Expected matching binormal caches after loading (a: {}; b: {})",
  140. fmt::join(a_binormal_cache, ", "), fmt::join(b_binormal_cache, ", "));
  141. memcpy(dst, buffer_a.data(), count_a * m_native_vtx_decl.stride);
  142. m_numLoadedVertices += count;
  143. return count_a;
  144. }
  145. private:
  146. std::unique_ptr<VertexLoaderBase> a;
  147. std::unique_ptr<VertexLoaderBase> b;
  148. std::vector<u8> buffer_a;
  149. std::vector<u8> buffer_b;
  150. };
  151. u32 VertexLoaderBase::GetVertexSize(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
  152. {
  153. u32 size = 0;
  154. // Each enabled TexMatIdx adds one byte, as does PosMatIdx
  155. size += std::popcount(vtx_desc.low.Hex & 0x1FF);
  156. const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
  157. vtx_attr.g0.PosElements);
  158. size += pos_size;
  159. const u32 norm_size =
  160. VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
  161. vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
  162. size += norm_size;
  163. for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
  164. {
  165. const u32 color_size =
  166. VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
  167. size += color_size;
  168. }
  169. for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
  170. {
  171. const u32 tc_size = VertexLoader_TextCoord::GetSize(
  172. vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
  173. size += tc_size;
  174. }
  175. return size;
  176. }
  177. u32 VertexLoaderBase::GetVertexComponents(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
  178. {
  179. u32 components = 0;
  180. if (vtx_desc.low.PosMatIdx)
  181. components |= VB_HAS_POSMTXIDX;
  182. for (u32 i = 0; i < vtx_desc.low.TexMatIdx.Size(); i++)
  183. {
  184. if (vtx_desc.low.TexMatIdx[i])
  185. components |= VB_HAS_TEXMTXIDX0 << i;
  186. }
  187. // Vertices always have positions; thus there is no VB_HAS_POS as it would always be set
  188. if (vtx_desc.low.Normal != VertexComponentFormat::NotPresent)
  189. {
  190. components |= VB_HAS_NORMAL;
  191. if (vtx_attr.g0.NormalElements == NormalComponentCount::NTB)
  192. components |= VB_HAS_TANGENT | VB_HAS_BINORMAL;
  193. }
  194. for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
  195. {
  196. if (vtx_desc.low.Color[i] != VertexComponentFormat::NotPresent)
  197. components |= VB_HAS_COL0 << i;
  198. }
  199. for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
  200. {
  201. if (vtx_desc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
  202. components |= VB_HAS_UV0 << i;
  203. }
  204. return components;
  205. }
  206. std::unique_ptr<VertexLoaderBase> VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc,
  207. const VAT& vtx_attr)
  208. {
  209. std::unique_ptr<VertexLoaderBase> loader = nullptr;
  210. // #define COMPARE_VERTEXLOADERS
  211. #if defined(_M_X86_64)
  212. loader = std::make_unique<VertexLoaderX64>(vtx_desc, vtx_attr);
  213. #elif defined(_M_ARM_64)
  214. loader = std::make_unique<VertexLoaderARM64>(vtx_desc, vtx_attr);
  215. #endif
  216. // Use the software loader as a fallback
  217. // (not currently applicable, as both VertexLoaderX64 and VertexLoaderARM64
  218. // are always usable, but if a loader that only works on some CPUs is created
  219. // then this fallback would be used)
  220. if (!loader)
  221. loader = std::make_unique<VertexLoader>(vtx_desc, vtx_attr);
  222. #if defined(COMPARE_VERTEXLOADERS)
  223. return std::make_unique<VertexLoaderTester>(
  224. std::make_unique<VertexLoader>(vtx_desc, vtx_attr), // the software one
  225. std::move(loader), // the new one to compare
  226. vtx_desc, vtx_attr);
  227. #else
  228. return loader;
  229. #endif
  230. }