From d45f9e4d26aa3205a8496893cbaf9b60efb330fe Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Mon, 13 Feb 2023 18:02:13 +0000 Subject: [PATCH] Loosen some texture WaR sync when possible By keeping track of the stages reading the image we can do more fine-grained WaR prevention, as opposed to waiting for all commands to complete. --- .../cpp/skyline/gpu/interconnect/fermi_2d.cpp | 2 +- .../main/cpp/skyline/gpu/texture/texture.cpp | 23 +++++++++++++------ .../main/cpp/skyline/gpu/texture/texture.h | 6 +++++ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp index b543758e..4a4e7bee 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp @@ -151,7 +151,7 @@ namespace skyline::gpu::interconnect { std::array sampledImages{srcTextureView.get()}; executor.AddSubpass(std::move(executionCallback), {{static_cast(dstRectX), static_cast(dstRectY)}, {dstRectWidth, dstRectHeight} }, sampledImages, {}, {dst}, {}, false, - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands); + vk::PipelineStageFlagBits::eAllGraphics, vk::PipelineStageFlagBits::eAllGraphics); } ); diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 6595d1bf..ba23cace 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -1007,22 +1007,31 @@ namespace skyline::gpu { lastRenderPassUsage = renderPassUsage; lastRenderPassIndex = renderPassIndex; - if (renderPassUsage == texture::RenderPassUsage::RenderTarget) + if (renderPassUsage == texture::RenderPassUsage::RenderTarget) { pendingStageMask = vk::PipelineStageFlagBits::eVertexShader | - vk::PipelineStageFlagBits::eTessellationControlShader | - vk::PipelineStageFlagBits::eTessellationEvaluationShader | - vk::PipelineStageFlagBits::eGeometryShader | - vk::PipelineStageFlagBits::eFragmentShader | - vk::PipelineStageFlagBits::eComputeShader; - else if (renderPassUsage == texture::RenderPassUsage::None) + vk::PipelineStageFlagBits::eTessellationControlShader | + vk::PipelineStageFlagBits::eTessellationEvaluationShader | + vk::PipelineStageFlagBits::eGeometryShader | + vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader; + readStageMask = {}; + } else if (renderPassUsage == texture::RenderPassUsage::None) { pendingStageMask = {}; + readStageMask = {}; + } } texture::RenderPassUsage Texture::GetLastRenderPassUsage() { return lastRenderPassUsage; } + vk::PipelineStageFlags Texture::GetReadStageMask() { + return readStageMask; + } + void Texture::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) { + readStageMask |= dstStage; + if (!(pendingStageMask & dstStage)) return; diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 63c36092..f04b70d2 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -411,6 +411,7 @@ namespace skyline::gpu { u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass vk::PipelineStageFlags pendingStageMask{}; //!< List of pipeline stages that are yet to be flushed for reads since the last time this texture was used an an RT + vk::PipelineStageFlags readStageMask{}; //!< Set of pipeline stages that this texture has been read in since it was last used as an RT friend TextureManager; friend TextureView; @@ -613,6 +614,11 @@ namespace skyline::gpu { */ texture::RenderPassUsage GetLastRenderPassUsage(); + /** + * @return The set of stages this texture has been read in since it was last used as an RT + */ + vk::PipelineStageFlags GetReadStageMask(); + /** * @brief Populates the input src and dst stage masks with appropriate read barrier parameters for the current texture state */