BloomBlurPass.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <PostProcessing/BloomBlurPass.h>
  9. #include <PostProcess/Bloom/BloomSettings.h>
  10. #include <PostProcess/PostProcessFeatureProcessor.h>
  11. #include <Atom/RHI/CommandList.h>
  12. #include <Atom/RHI/Factory.h>
  13. #include <Atom/RHI/FrameScheduler.h>
  14. #include <Atom/RPI.Reflect/Pass/PassTemplate.h>
  15. #include <Atom/RPI.Public/Pass/PassUtils.h>
  16. #include <Atom/RPI.Public/Pass/PassAttachment.h>
  17. #include <Atom/RPI.Public/Pass/PassDefines.h>
  18. #include <Atom/RPI.Public/Pass/PassFactory.h>
  19. #include <Atom/RPI.Public/Pass/PassSystemInterface.h>
  20. #include <Atom/RPI.Public/RPIUtils.h>
  21. #include <Atom/RPI.Public/Scene.h>
  22. #include <Atom/RPI.Public/RenderPipeline.h>
  23. #include <Atom/RPI.Public/View.h>
  24. #include <Atom/RPI.Public/RPISystemInterface.h>
  25. #include <Atom/RPI.Public/Shader/ShaderResourceGroup.h>
  26. #include <Atom/RPI.Public/Image/AttachmentImagePool.h>
  27. #include <Atom/RPI.Public/Image/ImageSystemInterface.h>
  28. #include <Atom/RPI.Reflect/Pass/ComputePassData.h>
  29. #include <sstream>
  30. namespace AZ
  31. {
  32. namespace Render
  33. {
  34. namespace BloomBlurPassConstants
  35. {
  36. // Maximum smoothing kernel size is 257 x 257
  37. constexpr float BlurFilterMaxRadius = 128;
  38. // Minimum smoothing kernel size is 1 x 1
  39. constexpr float BlurFilterMinRadius = 0;
  40. }
  41. RPI::Ptr<BloomBlurPass> BloomBlurPass::Create(const RPI::PassDescriptor& descriptor)
  42. {
  43. RPI::Ptr<BloomBlurPass> pass = aznew BloomBlurPass(descriptor);
  44. return AZStd::move(pass);
  45. }
  46. BloomBlurPass::BloomBlurPass(const RPI::PassDescriptor& descriptor)
  47. : ParentPass(descriptor)
  48. {
  49. // Load DownsampleMipChainPassData (shader asset)
  50. const RPI::DownsampleMipChainPassData* passData = RPI::PassUtils::GetPassData<RPI::DownsampleMipChainPassData>(descriptor);
  51. if (passData == nullptr)
  52. {
  53. AZ_Error("PassSystem", false, "[BloomBlurPass '%s']: Trying to construct without valid DownsampleMipChainPassData!",
  54. GetPathName().GetCStr());
  55. return;
  56. }
  57. m_passData = *passData;
  58. m_weightBuffer.resize(Render::Bloom::MaxStageCount);
  59. m_offsetBuffer.resize(Render::Bloom::MaxStageCount);
  60. }
  61. void BloomBlurPass::GetInputInfo()
  62. {
  63. AZ_Assert(GetInputOutputCount() > 0, "[BloomBlurPass '%s']: must have an input/output", GetPathName().GetCStr());
  64. RPI::PassAttachment* attachment = GetInputOutputBinding(0).GetAttachment().get();
  65. if (attachment != nullptr)
  66. {
  67. m_paramsUpdated |= (m_inputWidth != attachment->m_descriptor.m_image.m_size.m_width);
  68. m_paramsUpdated |= (m_inputHeight != attachment->m_descriptor.m_image.m_size.m_height);
  69. m_inputWidth = attachment->m_descriptor.m_image.m_size.m_width;
  70. m_inputHeight = attachment->m_descriptor.m_image.m_size.m_height;
  71. }
  72. else
  73. {
  74. AZ_Assert(GetInputOutputCount() > 0, "[BloomBlurPass '%s']: input/output image attachment not found", GetPathName().GetCStr());
  75. }
  76. }
  77. void BloomBlurPass::UpdateParameters()
  78. {
  79. auto UpdateIfChanged = [](float& local, float input)->bool
  80. {
  81. if (local != input)
  82. {
  83. local = input;
  84. return true;
  85. }
  86. else
  87. {
  88. return false;
  89. }
  90. };
  91. RPI::Scene* scene = GetScene();
  92. PostProcessFeatureProcessor* fp = scene->GetFeatureProcessor<PostProcessFeatureProcessor>();
  93. RPI::ViewPtr view = m_pipeline->GetFirstView(GetPipelineViewTag());
  94. if (fp)
  95. {
  96. PostProcessSettings* postProcessSettings = fp->GetLevelSettingsFromView(view);
  97. if (postProcessSettings)
  98. {
  99. BloomSettings* bloomSettings = postProcessSettings->GetBloomSettings();
  100. if (bloomSettings)
  101. {
  102. m_paramsUpdated |= UpdateIfChanged(m_kernelSizeScale,
  103. bloomSettings->GetKernelSizeScale());
  104. m_paramsUpdated |= UpdateIfChanged(m_kernelScreenPercents[0],
  105. bloomSettings->GetKernelSizeStage0());
  106. m_paramsUpdated |= UpdateIfChanged(m_kernelScreenPercents[1],
  107. bloomSettings->GetKernelSizeStage1());
  108. m_paramsUpdated |= UpdateIfChanged(m_kernelScreenPercents[2],
  109. bloomSettings->GetKernelSizeStage2());
  110. m_paramsUpdated |= UpdateIfChanged(m_kernelScreenPercents[3],
  111. bloomSettings->GetKernelSizeStage3());
  112. m_paramsUpdated |= UpdateIfChanged(m_kernelScreenPercents[4],
  113. bloomSettings->GetKernelSizeStage4());
  114. }
  115. }
  116. }
  117. }
  118. void BloomBlurPass::CreateBinding(BloomBlurChildPass* pass, uint32_t mipLevel, bool isHorizontalPass)
  119. {
  120. RPI::PassAttachmentBinding& parentInOutBinding = GetInputOutputBinding(0);
  121. const RPI::Ptr<RPI::PassAttachment>& parentInOutAttachment = parentInOutBinding.GetAttachment();
  122. RPI::PassAttachmentBinding& parentInBinding = GetInputBinding(0);
  123. const RPI::Ptr<RPI::PassAttachment>& parentWorkSpaceAttachment = parentInBinding.GetAttachment();
  124. // Create input binding, from downsampling pass
  125. RPI::PassAttachmentBinding inBinding;
  126. inBinding.m_name = "Input";
  127. inBinding.m_shaderInputName = "m_inputTexture";
  128. inBinding.m_slotType = RPI::PassSlotType::Input;
  129. inBinding.m_scopeAttachmentUsage = RHI::ScopeAttachmentUsage::Shader;
  130. inBinding.m_connectedBinding = isHorizontalPass ? &parentInOutBinding : &parentInBinding;
  131. RHI::ImageViewDescriptor viewDesc;
  132. viewDesc.m_mipSliceMin = static_cast<uint16_t>(mipLevel);
  133. viewDesc.m_mipSliceMax = static_cast<uint16_t>(mipLevel);
  134. inBinding.m_unifiedScopeDesc.SetAsImage(viewDesc);
  135. inBinding.SetAttachment(parentInOutAttachment);
  136. pass->AddAttachmentBinding(inBinding);
  137. // Create output binding, owned by current pass
  138. RPI::PassAttachmentBinding outBinding;
  139. outBinding.m_name = "Output";
  140. outBinding.m_shaderInputName = "m_outputTexture";
  141. outBinding.m_slotType = RPI::PassSlotType::Output;
  142. outBinding.m_scopeAttachmentUsage = RHI::ScopeAttachmentUsage::Shader;
  143. outBinding.m_connectedBinding = isHorizontalPass ? &parentInBinding : &parentInOutBinding;
  144. // Output to the same mip level as input downsampled texture
  145. outBinding.m_unifiedScopeDesc.SetAsImage(viewDesc);
  146. outBinding.SetAttachment(parentWorkSpaceAttachment);
  147. pass->AddAttachmentBinding(outBinding);
  148. }
  149. void BloomBlurPass::BuildChildPasses()
  150. {
  151. if (!m_children.empty())
  152. {
  153. // In this case children are still exists but attachment binding is flushed out, so we rebind them again
  154. uint32_t stageCount = Render::Bloom::MaxStageCount;
  155. for (uint32_t childIndex = 0; childIndex < m_children.size(); ++childIndex)
  156. {
  157. uint32_t stageIndex = childIndex % stageCount;
  158. bool isHorizontalPass = childIndex < stageCount;
  159. BloomBlurChildPass* blurChild = static_cast<BloomBlurChildPass*>(m_children[childIndex].get());
  160. CreateBinding(blurChild, stageIndex, isHorizontalPass);
  161. }
  162. }
  163. else
  164. {
  165. // Create children
  166. RPI::PassSystemInterface* passSystem = RPI::PassSystemInterface::Get();
  167. uint32_t stageCount = Render::Bloom::MaxStageCount;
  168. for (uint32_t childIndex = 0; childIndex < stageCount * 2; ++childIndex)
  169. {
  170. uint32_t stageIndex = childIndex % stageCount;
  171. bool isHorizontalPass = childIndex < stageCount;
  172. RPI::PassDescriptor childDesc;
  173. childDesc.m_passData = AZStd::make_shared<RPI::ComputePassData>();
  174. RPI::ComputePassData* passData = static_cast<RPI::ComputePassData*>(childDesc.m_passData.get());
  175. passData->m_shaderReference = m_passData.m_shaderReference;
  176. if (isHorizontalPass)
  177. {
  178. childDesc.m_passName = Name{ AZStd::string::format("BloomBlurHorizontal%d", stageIndex) };
  179. }
  180. else
  181. {
  182. childDesc.m_passName = Name{ AZStd::string::format("BloomBlurVertical%d", stageIndex) };
  183. }
  184. RPI::Ptr<BloomBlurChildPass> childPass = passSystem->CreatePass<BloomBlurChildPass>(childDesc);
  185. CreateBinding(childPass.get(), stageIndex, isHorizontalPass);
  186. AddChild(childPass);
  187. }
  188. }
  189. }
  190. void BloomBlurPass::UpdateChildren()
  191. {
  192. uint32_t imageWidth, imageHeight;
  193. imageWidth = m_inputWidth;
  194. imageHeight = m_inputHeight;
  195. uint32_t stageCount = Render::Bloom::MaxStageCount;
  196. for (uint32_t childIdxH = 0, childIdxV = stageCount; childIdxH < stageCount; ++childIdxH, ++childIdxV)
  197. {
  198. // Horizontal
  199. BloomBlurChildPass* blurChild = static_cast<BloomBlurChildPass*>(m_children[childIdxH].get());
  200. blurChild->UpdateParameters(
  201. m_offsetBuffer[childIdxH], m_weightBuffer[childIdxH], m_kernelRadiusData[childIdxH],
  202. true, childIdxH, imageWidth, imageHeight);
  203. // Vertical
  204. blurChild = static_cast<BloomBlurChildPass*>(m_children[childIdxV].get());
  205. blurChild->UpdateParameters(
  206. m_offsetBuffer[childIdxH], m_weightBuffer[childIdxH], m_kernelRadiusData[childIdxH],
  207. false, childIdxH, imageWidth, imageHeight);
  208. imageWidth = imageWidth / 2;
  209. imageHeight = imageHeight / 2;
  210. }
  211. }
  212. void BloomBlurPass::BuildInternal()
  213. {
  214. BuildChildPasses();
  215. ParentPass::BuildInternal();
  216. }
  217. void BloomBlurPass::FrameBeginInternal(FramePrepareParams params)
  218. {
  219. GetInputInfo();
  220. UpdateParameters();
  221. if (m_paramsUpdated)
  222. {
  223. BuildKernelData();
  224. m_paramsUpdated = false;
  225. }
  226. UpdateChildren();
  227. ParentPass::FrameBeginInternal(params);
  228. }
  229. void BloomBlurPass::BuildKernelData()
  230. {
  231. m_weightData.clear();
  232. m_offsetData.clear();
  233. m_kernelRadiusData.clear();
  234. RPI::PassAttachment* inOutAttachment = GetInputOutputBinding(0).GetAttachment().get();
  235. uint32_t imageWidth = inOutAttachment->m_descriptor.m_image.m_size.m_width;
  236. // Horizontal & vertical pass shared the same kernel
  237. for (uint32_t i = 0; i < Render::Bloom::MaxStageCount; ++i)
  238. {
  239. // (Input screen width) * (Downscale factor based on mip level) * (ratio of kernel size and screen width) *
  240. // 0.5 to convert from diameter to radius (exclude center pixel)
  241. // Result is limited to a upper/lower bound to avoid extreme cases
  242. float radius = KernelRadiusClamp(imageWidth * (1.0f / static_cast<float>(exp2(i))) * AZStd::min(1.0f, m_kernelSizeScale * m_kernelScreenPercents[i]) * 0.5f);
  243. uint32_t kernelIntegerRadius = static_cast<uint32_t>(floor(radius + 0.5));
  244. // Define the width of kernel as 6*sigma (3 sigma each side) to achieve 99.7% confidence interval
  245. float sigma = radius / 3.0f;
  246. if (kernelIntegerRadius > 0)
  247. {
  248. GenerateWeightOffset(sigma, kernelIntegerRadius);
  249. PrepareBuffer(i);
  250. }
  251. else
  252. {
  253. // If kernel radius is 0 skip kernel calculation and buffer preparation
  254. m_weightData.emplace_back();
  255. m_offsetData.emplace_back();
  256. m_kernelRadiusData.push_back(0);
  257. }
  258. }
  259. }
  260. float BloomBlurPass::KernelRadiusClamp(float radius)
  261. {
  262. return fmaxf(fminf(radius, BloomBlurPassConstants::BlurFilterMaxRadius), BloomBlurPassConstants::BlurFilterMinRadius);
  263. }
  264. float BloomBlurPass::Gaussian1D(float x, float sigma)
  265. {
  266. return (1.0f / (sqrt(Constants::TwoPi) * sigma)) * exp(-(x * x) / (2.0f * sigma * sigma));
  267. }
  268. void BloomBlurPass::GenerateWeightOffset(float sigma, uint32_t kernelRadius)
  269. {
  270. float weightSum = 0.0;
  271. float weight = 0.0;
  272. // Gaussian kernel is radially symmetric, so we only store one wing of the 1d kernel
  273. AZStd::vector<float> weights;
  274. AZStd::vector<float> offsets;
  275. // Center pixel
  276. weight = Gaussian1D(0, sigma);
  277. weights.push_back(weight);
  278. offsets.push_back(0);
  279. weightSum += weight;
  280. for (uint32_t i = 1; i <= kernelRadius; i += 2)
  281. {
  282. float weight0 = Gaussian1D(static_cast<float>(i), sigma);
  283. float weight1 = 0.0;
  284. if (i != kernelRadius)
  285. {
  286. weight1 = Gaussian1D(static_cast<float>(i + 1), sigma);
  287. }
  288. weight = weight0 + weight1;
  289. weights.push_back(weight);
  290. // (i * weight0 + (i + 1) * weight1) / (weight0 + weight1)
  291. // => (i * (weight0 + weight1) + weight1) / (weight0 + weight1)
  292. // => i + weight1 / (weight0 + weight1)
  293. offsets.push_back(i + weight1 / weight);
  294. // Two symmetric weight on each side
  295. weightSum += weight * 2;
  296. }
  297. // Renormalize so the kenrel weight sum to 1
  298. float weightSumRcp = 1.0f / weightSum;
  299. for (uint32_t i = 0; i < weights.size(); ++i)
  300. {
  301. weights[i] *= weightSumRcp;
  302. }
  303. m_weightData.push_back(weights);
  304. m_offsetData.push_back(offsets);
  305. // Record reduced kernel's radius
  306. m_kernelRadiusData.push_back(static_cast<uint32_t>(weights.size()));
  307. }
  308. void BloomBlurPass::PrepareBuffer(uint32_t blurStageIndex)
  309. {
  310. uint32_t byteCount = sizeof(float) * static_cast<uint32_t>(m_weightData[blurStageIndex].size());
  311. // Prepare buffer, these two buffers shared the same size and layout so can be allocated together
  312. if (!(m_weightBuffer[blurStageIndex] || m_offsetBuffer[blurStageIndex]))
  313. {
  314. std::stringstream ss;
  315. ss << GetPathName().GetCStr() << ".WeightBuffer.Stage" << blurStageIndex;
  316. RPI::CommonBufferDescriptor desc;
  317. desc.m_poolType = RPI::CommonBufferPoolType::ReadOnly;
  318. desc.m_bufferName = ss.str().c_str();
  319. desc.m_elementSize = sizeof(float);
  320. desc.m_elementFormat = RHI::Format::R32_FLOAT;
  321. desc.m_byteCount = byteCount;
  322. desc.m_bufferData = static_cast<void*>(m_weightData[blurStageIndex].data());
  323. m_weightBuffer[blurStageIndex] = RPI::BufferSystemInterface::Get()->CreateBufferFromCommonPool(desc);
  324. ss.clear();
  325. ss << GetPathName().GetCStr() << "OffsetBuffer.Stage" << blurStageIndex;
  326. desc.m_bufferName = ss.str().c_str();
  327. desc.m_bufferData = static_cast<void*>(m_offsetData[blurStageIndex].data());
  328. m_offsetBuffer[blurStageIndex] = RPI::BufferSystemInterface::Get()->CreateBufferFromCommonPool(desc);
  329. }
  330. else
  331. {
  332. // Update buffer data and resize if necessary, weight and offset buffer always has same size
  333. // therefore no need to check twice
  334. if (byteCount != m_weightBuffer[blurStageIndex]->GetBufferSize())
  335. {
  336. m_weightBuffer[blurStageIndex]->Resize(byteCount);
  337. m_offsetBuffer[blurStageIndex]->Resize(byteCount);
  338. }
  339. m_weightBuffer[blurStageIndex]->UpdateData(m_weightData[blurStageIndex].data(), byteCount);
  340. m_offsetBuffer[blurStageIndex]->UpdateData(m_offsetData[blurStageIndex].data(), byteCount);
  341. }
  342. }
  343. // ============ Child pass's member function ============
  344. RPI::Ptr<BloomBlurChildPass> BloomBlurChildPass::Create(const RPI::PassDescriptor& descriptor)
  345. {
  346. RPI::Ptr<BloomBlurChildPass> pass = aznew BloomBlurChildPass(descriptor);
  347. return AZStd::move(pass);
  348. }
  349. BloomBlurChildPass::BloomBlurChildPass(const RPI::PassDescriptor& descriptor)
  350. : ComputePass(descriptor)
  351. { }
  352. void BloomBlurChildPass::UpdateParameters(
  353. Data::Instance<RPI::Buffer> offsetBuffer,
  354. Data::Instance<RPI::Buffer> weightBuffer,
  355. uint32_t radius,
  356. bool direction,
  357. uint32_t mipLevel,
  358. uint32_t imageWidth,
  359. uint32_t imageHeight)
  360. {
  361. // These quantities are stored locally because they need to be passed every frame
  362. // but the function is only invoked when parameters are updated
  363. m_offsetBuffer = offsetBuffer;
  364. m_weightBuffer = weightBuffer;
  365. m_sourceImageWidth = imageWidth;
  366. m_sourceImageHeight = imageHeight;
  367. m_shaderResourceGroup->SetConstant(m_kernelRadiusInputIndex, radius);
  368. m_shaderResourceGroup->SetConstant(m_directionInputIndex, direction);
  369. m_shaderResourceGroup->SetConstant(m_mipLevelInputIndex, mipLevel);
  370. float width = static_cast<float>(imageWidth);
  371. float height = static_cast<float>(imageHeight);
  372. m_shaderResourceGroup->SetConstant(m_sourceImageSizeInputIndex, AZ::Vector2(width, height));
  373. m_shaderResourceGroup->SetConstant(m_sourceImageTexelSizeInputIndex, AZ::Vector2(1.0f / width, 1.0f / height));
  374. }
  375. void BloomBlurChildPass::FrameBeginInternal(FramePrepareParams params)
  376. {
  377. if (m_offsetBuffer)
  378. {
  379. m_shaderResourceGroup->SetBufferView(
  380. m_offsetsInputIndex, m_offsetBuffer->GetBufferView());
  381. }
  382. if (m_weightBuffer)
  383. {
  384. m_shaderResourceGroup->SetBufferView(
  385. m_weightsInputIndex, m_weightBuffer->GetBufferView());
  386. }
  387. SetTargetThreadCounts(m_sourceImageWidth, m_sourceImageHeight, 1);
  388. ComputePass::FrameBeginInternal(params);
  389. }
  390. } // namespace RPI
  391. } // namespace AZ