From bf89f96bf5156a8927ba54dae61e298f67e0af25 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Wed, 8 Dec 2021 02:01:36 +0530 Subject: [PATCH] Implement Optimized LoadOp Clears for Depth/Stencil Attachments Implements `AddClearDepthStencilSubpass` in `CommandExecutor` which is similar to `ClearColorAttachment` in that it uses `VK_ATTACHMENT_LOAD_OP_CLEAR` for the clear which is far more efficient than using `VK_ATTACHMENT_LOAD_OP_LOAD` then doing the clear. --- .../gpu/interconnect/command_executor.cpp | 30 +++++++++++++++++++ .../gpu/interconnect/command_executor.h | 8 ++++- .../gpu/interconnect/command_nodes.cpp | 23 ++++++++++++++ .../skyline/gpu/interconnect/command_nodes.h | 9 +++++- 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index f43ba19d..522b6eed 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -89,6 +89,36 @@ namespace skyline::gpu::interconnect { } } + void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) { + AttachTexture(attachment->texture); + + bool newRenderPass{CreateRenderPass(vk::Rect2D{ + .extent = attachment->texture->dimensions, + })}; + renderPass->AddSubpass({}, {}, attachment); + + if (renderPass->ClearDepthStencilAttachment(value)) { + if (!newRenderPass) + nodes.emplace_back(std::in_place_type_t()); + } else { + auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &, vk::RenderPass, u32) { + commandBuffer.clearAttachments(vk::ClearAttachment{ + .aspectMask = aspect, + .clearValue = value, + }, vk::ClearRect{ + .rect.extent = extent, + .baseArrayLayer = 0, + .layerCount = 1, + }); + }}; + + if (newRenderPass) + nodes.emplace_back(std::in_place_type_t(), function); + else + nodes.emplace_back(std::in_place_type_t(), function); + } + } + void CommandExecutor::Execute() { if (!nodes.empty()) { TRACE_EVENT("gpu", "CommandExecutor::Execute"); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index ede340d8..b430983e 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -52,11 +52,17 @@ namespace skyline::gpu::interconnect { void AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments = {}, span colorAttachments = {}, TextureView *depthStencilAttachment = {}); /** - * @brief Adds a subpass that clears the entirety of the specified attachment with a value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible + * @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible * @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution */ void AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value); + /** + * @brief Adds a subpass that clears the entirety of the specified attachment with a depth/stencil value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible + * @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution + */ + void AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value); + /** * @brief Execute all the nodes and submit the resulting command buffer to the GPU */ diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp index b9cef79d..96c9bc60 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp @@ -166,6 +166,29 @@ namespace skyline::gpu::interconnect::node { return false; } + bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value) { + auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pDepthStencilAttachment)}; + auto attachmentIndex{attachmentReference->attachment}; + + for (const auto &reference : attachmentReferences) + if (reference.attachment == attachmentIndex && &reference != attachmentReference) + return false; + + auto &attachmentDescription{attachmentDescriptions.at(attachmentIndex)}; + if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eLoad) { + attachmentDescription.loadOp = vk::AttachmentLoadOp::eClear; + + clearValues.resize(attachmentIndex + 1); + clearValues[attachmentIndex].depthStencil = value; + + return true; + } else if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eClear && clearValues[attachmentIndex].depthStencil == value) { + return true; + } + + return false; + } + vk::RenderPass RenderPassNode::operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { storage->device = &gpu.vkDevice; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h index 3368d58b..edd7511f 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h @@ -79,13 +79,20 @@ namespace skyline::gpu::interconnect::node { void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment); /** - * @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_LOAD + * @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR * @param colorAttachment The index of the attachment in the attachments bound to the current subpass * @return If the attachment could be cleared or not due to conflicts with other operations * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass */ bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value); + /** + * @brief Clears the depth/stencil attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR + * @return If the attachment could be cleared or not due to conflicts with other operations + * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass + */ + bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value); + vk::RenderPass operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu); };