GpuPassProfiler.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <AzCore/std/containers/unordered_map.h>
  9. #include <AzCore/std/sort.h>
  10. #include <Atom/RPI.Public/Pass/ParentPass.h>
  11. #include <Atom/RPI.Public/GpuQuery/GpuPassProfiler.h>
  12. namespace AZ
  13. {
  14. namespace RPI
  15. {
  16. ///////////////////////////////////////////////////////////////////////
  17. // --- PassEntry Start---
  18. GpuPassProfiler::PassEntry::PassEntry(const RPI::Pass* pass, GpuPassProfiler::PassEntry* parent)
  19. {
  20. m_name = pass->GetName();
  21. m_path = pass->GetPathName();
  22. m_parent = parent;
  23. m_enabled = pass->IsEnabled();
  24. m_timestampEnabled = pass->IsTimestampQueryEnabled();
  25. m_pipelineStatisticsEnabled = pass->IsPipelineStatisticsQueryEnabled();
  26. m_isParent = pass->AsParent() != nullptr;
  27. // [GFX TODO][ATOM-4001] Cache the timestamp and PipelineStatistics results.
  28. // Get the query results from the passes.
  29. m_timestampResult = pass->GetLatestTimestampResult();
  30. const RPI::PipelineStatisticsResult rps = pass->GetLatestPipelineStatisticsResult();
  31. m_pipelineStatistics = { rps.m_vertexCount, rps.m_primitiveCount, rps.m_vertexShaderInvocationCount,
  32. rps.m_rasterizedPrimitiveCount, rps.m_renderedPrimitiveCount, rps.m_pixelShaderInvocationCount, rps.m_computeShaderInvocationCount };
  33. // Disable the entry if it has a parent that is also not enabled.
  34. if (m_parent)
  35. {
  36. m_enabled = pass->IsEnabled() && m_parent->m_enabled;
  37. }
  38. }
  39. void GpuPassProfiler::PassEntry::LinkChild(PassEntry* childEntry)
  40. {
  41. m_children.push_back(childEntry);
  42. if (!m_linked && m_parent)
  43. {
  44. m_linked = true;
  45. // Recursively create parent->child references for entries that aren't linked to the root entry yet.
  46. // Effectively walking the tree backwards from the leaf to the root entry, and establishing parent->child references to
  47. // entries that aren't connected to the root entry yet.
  48. m_parent->LinkChild(this);
  49. }
  50. childEntry->m_linked = true;
  51. }
  52. bool GpuPassProfiler::PassEntry::IsTimestampEnabled() const
  53. {
  54. return m_enabled && m_timestampEnabled;
  55. }
  56. bool GpuPassProfiler::PassEntry::IsPipelineStatisticsEnabled() const
  57. {
  58. return m_enabled && m_pipelineStatisticsEnabled;
  59. }
  60. // --- PassEntry End ---
  61. ///////////////////////////////////////////////////////////////////////
  62. AZStd::unordered_map<Name, GpuPassProfiler::PassEntry> GpuPassProfiler::CreatePassEntriesDatabase(RHI::Ptr<RPI::ParentPass> rootPass)
  63. {
  64. AZStd::unordered_map<Name, GpuPassProfiler::PassEntry> passEntryDatabase;
  65. const auto addPassEntry = [&passEntryDatabase](const RPI::Pass* pass, GpuPassProfiler::PassEntry* parent) -> GpuPassProfiler::PassEntry*
  66. {
  67. // If parent a nullptr, it's assumed to be the rootpass.
  68. if (parent == nullptr)
  69. {
  70. return &passEntryDatabase[pass->GetPathName()];
  71. }
  72. else
  73. {
  74. GpuPassProfiler::PassEntry entry(pass, parent);
  75. // Set the time stamp in the database.
  76. [[maybe_unused]] const auto passEntry = passEntryDatabase.find(entry.m_path);
  77. AZ_Assert(passEntry == passEntryDatabase.end(), "There already is an entry with the name \"%s\".", entry.m_path.GetCStr());
  78. // Set the entry in the map.
  79. GpuPassProfiler::PassEntry& entryRef = passEntryDatabase[entry.m_path] = entry;
  80. return &entryRef;
  81. }
  82. };
  83. // NOTE: Write it all out, can't have recursive functions for lambdas.
  84. const AZStd::function<void(const RPI::Pass*, PassEntry*)> getPassEntryRecursive = [&addPassEntry, &getPassEntryRecursive](const RPI::Pass* pass, GpuPassProfiler::PassEntry* parent) -> void
  85. {
  86. const RPI::ParentPass* passAsParent = pass->AsParent();
  87. // Add new entry to the timestamp map.
  88. GpuPassProfiler::PassEntry* entry = addPassEntry(pass, parent);
  89. // Recur if it's a parent.
  90. if (passAsParent)
  91. {
  92. for (const auto& childPass : passAsParent->GetChildren())
  93. {
  94. getPassEntryRecursive(childPass.get(), entry);
  95. }
  96. }
  97. };
  98. // Set up the root entry.
  99. GpuPassProfiler::PassEntry rootEntry(static_cast<RPI::Pass*>(rootPass.get()), nullptr);
  100. passEntryDatabase[rootPass->GetPathName()] = rootEntry;
  101. // Create an intermediate structure from the passes.
  102. // Recursively create the timestamp entries tree.
  103. getPassEntryRecursive(static_cast<RPI::Pass*>(rootPass.get()), nullptr);
  104. // Interpolate the old values.
  105. const float lerpWeight = 0.2f;
  106. InterpolatePassEntries(passEntryDatabase, lerpWeight);
  107. return passEntryDatabase;
  108. }
  109. void GpuPassProfiler::InterpolatePassEntries(AZStd::unordered_map<Name, GpuPassProfiler::PassEntry>& passEntryDatabase, float weight) const
  110. {
  111. for (auto& entry : passEntryDatabase)
  112. {
  113. const auto oldEntryIt = passEntryDatabase.find(entry.second.m_path);
  114. if (oldEntryIt != passEntryDatabase.end())
  115. {
  116. // Interpolate the timestamps.
  117. const double interpolated = Lerp(static_cast<double>(oldEntryIt->second.m_interpolatedTimestampInNanoseconds),
  118. static_cast<double>(entry.second.m_timestampResult.GetDurationInNanoseconds()),
  119. static_cast<double>(weight));
  120. entry.second.m_interpolatedTimestampInNanoseconds = static_cast<uint64_t>(interpolated);
  121. }
  122. }
  123. }
  124. AZStd::vector<GpuPassProfiler::PassEntry*> GpuPassProfiler::SortPassEntriesByTimestamps(AZStd::unordered_map<Name, PassEntry>& timestampEntryDatabase)
  125. {
  126. // pass entry grid based on its timestamp
  127. AZStd::vector<GpuPassProfiler::PassEntry*> sortedPassEntries;
  128. sortedPassEntries.reserve(timestampEntryDatabase.size());
  129. // Set the child of the parent, only if it passes the filter.
  130. for (auto& passEntryIt : timestampEntryDatabase)
  131. {
  132. PassEntry* passEntry = &passEntryIt.second;
  133. // Collect all pass entries with non-zero durations
  134. if (passEntry->m_timestampResult.GetDurationInTicks() > 0)
  135. {
  136. sortedPassEntries.push_back(passEntry);
  137. }
  138. }
  139. // Sort the pass entries based on their starting time and duration
  140. AZStd::sort(sortedPassEntries.begin(), sortedPassEntries.end(), [](const PassEntry* passEntry1, const PassEntry* passEntry2) {
  141. if (passEntry1->m_timestampResult.GetTimestampBeginInTicks() == passEntry2->m_timestampResult.GetTimestampBeginInTicks())
  142. {
  143. return passEntry1->m_timestampResult.GetDurationInTicks() < passEntry2->m_timestampResult.GetDurationInTicks();
  144. }
  145. return passEntry1->m_timestampResult.GetTimestampBeginInTicks() < passEntry2->m_timestampResult.GetTimestampBeginInTicks();
  146. });
  147. return sortedPassEntries;
  148. }
  149. uint64_t GpuPassProfiler::CalculateTotalGpuPassTime(const AZStd::vector<GpuPassProfiler::PassEntry*>& sortedPassEntries)
  150. {
  151. // calculate the total GPU duration.
  152. RPI::TimestampResult gpuTimestamp;
  153. if (sortedPassEntries.size() > 0)
  154. {
  155. gpuTimestamp = sortedPassEntries.front()->m_timestampResult;
  156. gpuTimestamp.Add(sortedPassEntries.back()->m_timestampResult);
  157. }
  158. return gpuTimestamp.GetDurationInNanoseconds();
  159. }
  160. uint64_t GpuPassProfiler::MeasureGpuTimeInNanoseconds(RHI::Ptr<RPI::ParentPass> rootPass)
  161. {
  162. if (m_measureGpuTime)
  163. {
  164. if (!rootPass->IsTimestampQueryEnabled())
  165. {
  166. rootPass->SetTimestampQueryEnabled(true);
  167. }
  168. }
  169. else
  170. {
  171. if (rootPass->IsTimestampQueryEnabled())
  172. {
  173. rootPass->SetTimestampQueryEnabled(false);
  174. }
  175. return 0;
  176. }
  177. // This would be the non-efficient way to measure GPU time per frame, but
  178. // it is what ImGuiGpuProfiler would need to do as it needs to show more detailed data.
  179. // If your FPS is at 300fps, running these three functions can make it drop to ~265fps.
  180. //auto passEntryDatabase = CreatePassEntriesDatabase(rootPass);
  181. //auto sortedPassEntries = SortPassEntriesByTimestamps(passEntryDatabase);
  182. //return CalculateTotalGpuPassTime(sortedPassEntries);
  183. AZ::RPI::TimestampResult resultBegin(AZStd::numeric_limits<uint64_t>::max(), AZStd::numeric_limits<uint64_t>::max(), RHI::HardwareQueueClass::Graphics);
  184. AZ::RPI::TimestampResult resultEnd;
  185. // NOTE: Write it all out, can't have recursive functions for lambdas.
  186. const AZStd::function<void(const RPI::Pass*)> calculateResultEndRecursive = [&resultBegin, &resultEnd, &calculateResultEndRecursive](const RPI::Pass* pass) -> void
  187. {
  188. const RPI::ParentPass* passAsParent = pass->AsParent();
  189. // Add new entry to the timestamp map.
  190. AZ::RPI::TimestampResult passTime = pass->GetLatestTimestampResult();
  191. if (passTime.GetDurationInTicks() > 0)
  192. {
  193. const auto passBeginInTicks = passTime.GetTimestampBeginInTicks();
  194. if (passBeginInTicks < resultBegin.GetTimestampBeginInTicks())
  195. {
  196. resultBegin = passTime;
  197. }
  198. if (resultEnd.GetTimestampBeginInTicks() == passBeginInTicks)
  199. {
  200. if (resultEnd.GetDurationInTicks() < passTime.GetDurationInTicks())
  201. {
  202. resultEnd = passTime;
  203. }
  204. }
  205. else if (resultEnd.GetTimestampBeginInTicks() < passBeginInTicks)
  206. {
  207. resultEnd = passTime;
  208. }
  209. }
  210. // Recur if it's a parent.
  211. if (passAsParent)
  212. {
  213. for (const auto& childPass : passAsParent->GetChildren())
  214. {
  215. calculateResultEndRecursive(childPass.get());
  216. }
  217. }
  218. };
  219. calculateResultEndRecursive(rootPass.get());
  220. if (resultBegin.GetTimestampBeginInTicks() >= resultEnd.GetTimestampBeginInTicks())
  221. {
  222. // Bogus data. This is normal for the first 3 frames.
  223. return 0;
  224. }
  225. // calculate the total GPU duration.
  226. resultBegin.Add(resultEnd);
  227. return resultBegin.GetDurationInNanoseconds();
  228. }
  229. } // namespace RPI
  230. } // namespace AZ