Coalesce subpasses with compatible attachments together

We run into a lot of successive subpasses with the exact same framebuffer configuration which we now exploit to avoid the creation of a new subpass due to the overhead involved with this. This provides significant performance boosts in certain cases due to the magnitude of difference in the amount of subpasses being created while providing next to no benefit in other cases.
This commit is contained in:
PixelyIon 2022-04-27 13:22:34 +05:30
parent a947933bf0
commit 90c635bf78
2 changed files with 90 additions and 54 deletions

View File

@ -11,21 +11,64 @@ namespace skyline::gpu::interconnect {
cycle->Cancel(); cycle->Cancel();
} }
bool CommandExecutor::CreateRenderPass(vk::Rect2D renderArea) { bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment) {
if (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount)) { auto addSubpass{[&] {
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment);
lastSubpassAttachments.clear();
auto insertAttachmentRange{[this](auto &attachments) -> std::pair<size_t, size_t> {
size_t beginIndex{lastSubpassAttachments.size()};
lastSubpassAttachments.insert(lastSubpassAttachments.end(), attachments.begin(), attachments.end());
return {beginIndex, attachments.size()};
}};
auto rangeToSpan{[this](auto &range) -> span<TextureView *> {
return {lastSubpassAttachments.data() + range.first, range.second};
}};
auto inputAttachmentRange{insertAttachmentRange(inputAttachments)};
auto colorAttachmentRange{insertAttachmentRange(colorAttachments)};
lastSubpassInputAttachments = rangeToSpan(inputAttachmentRange);
lastSubpassColorAttachments = rangeToSpan(colorAttachmentRange);
lastSubpassDepthStencilAttachment = depthStencilAttachment;
}};
if (renderPass == nullptr || (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount))) {
// We need to create a render pass if one doesn't already exist or the current one isn't compatible
if (renderPass != nullptr)
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>()); nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
renderPass = nullptr; renderPass = &std::get<node::RenderPassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
addSubpass();
subpassCount = 0; subpassCount = 0;
return false;
} else {
if (ranges::equal(lastSubpassInputAttachments, inputAttachments) &&
ranges::equal(lastSubpassColorAttachments, colorAttachments) &&
lastSubpassDepthStencilAttachment == depthStencilAttachment) {
// The last subpass had the same attachments, so we can reuse them
return false;
} else {
// The last subpass had different attachments, so we need to create a new one
addSubpass();
subpassCount++;
return true;
}
}
} }
bool newRenderPass{renderPass == nullptr}; void CommandExecutor::FinishRenderPass() {
if (newRenderPass) if (renderPass) {
// We need to create a render pass if one doesn't already exist or the current one isn't compatible nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
renderPass = &std::get<node::RenderPassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
else
subpassCount++;
return newRenderPass; renderPass = nullptr;
subpassCount = 0;
lastSubpassAttachments.clear();
lastSubpassInputAttachments = nullptr;
lastSubpassColorAttachments = nullptr;
lastSubpassDepthStencilAttachment = nullptr;
}
} }
void CommandExecutor::AttachTexture(TextureView *view) { void CommandExecutor::AttachTexture(TextureView *view) {
@ -52,39 +95,27 @@ namespace skyline::gpu::interconnect {
} }
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool exclusiveSubpass) { void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool exclusiveSubpass) {
if (exclusiveSubpass && renderPass) { if (exclusiveSubpass)
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>()); FinishRenderPass();
renderPass = nullptr;
subpassCount = 0;
}
bool newRenderPass{CreateRenderPass(renderArea)}; bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr)};
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr); if (gotoNext)
if (newRenderPass)
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), std::forward<decltype(function)>(function));
else
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function)); nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
else
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), std::forward<decltype(function)>(function));
} }
void CommandExecutor::AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) { void CommandExecutor::AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
if (renderPass) { if (renderPass)
// End render pass, if we're in one FinishRenderPass();
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
renderPass = nullptr;
subpassCount = 0;
}
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function)); nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
} }
void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) { void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) {
bool newRenderPass{CreateRenderPass(vk::Rect2D{ bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)};
.extent = attachment->texture->dimensions,
})};
renderPass->AddSubpass({}, attachment, nullptr);
if (renderPass->ClearColorAttachment(0, value)) { if (renderPass->ClearColorAttachment(0, value)) {
if (!newRenderPass) if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>()); nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else { } else {
auto function{[scissor = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) { auto function{[scissor = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
@ -99,21 +130,17 @@ namespace skyline::gpu::interconnect {
}); });
}}; }};
if (newRenderPass) if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function); nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
} }
} }
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) { void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
bool newRenderPass{CreateRenderPass(vk::Rect2D{ bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)};
.extent = attachment->texture->dimensions,
})};
renderPass->AddSubpass({}, {}, attachment);
if (renderPass->ClearDepthStencilAttachment(value)) { if (renderPass->ClearDepthStencilAttachment(value)) {
if (!newRenderPass) if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>()); nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else { } else {
auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) { auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
@ -127,10 +154,10 @@ namespace skyline::gpu::interconnect {
}); });
}}; }};
if (newRenderPass) if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function); nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
} }
} }
@ -138,11 +165,8 @@ namespace skyline::gpu::interconnect {
if (!nodes.empty()) { if (!nodes.empty()) {
TRACE_EVENT("gpu", "CommandExecutor::Execute"); TRACE_EVENT("gpu", "CommandExecutor::Execute");
if (renderPass) { if (renderPass)
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>()); FinishRenderPass();
renderPass = nullptr;
subpassCount = 0;
}
{ {
auto &commandBuffer{*activeCommandBuffer}; auto &commandBuffer{*activeCommandBuffer};
@ -155,7 +179,7 @@ namespace skyline::gpu::interconnect {
texture->MarkGpuDirty(); texture->MarkGpuDirty();
} }
for (const auto& delegate : attachedBuffers) for (const auto &delegate : attachedBuffers)
delegate->usageCallback = nullptr; delegate->usageCallback = nullptr;
vk::RenderPass lRenderPass; vk::RenderPass lRenderPass;
@ -187,7 +211,7 @@ namespace skyline::gpu::interconnect {
commandBuffer.end(); commandBuffer.end();
gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence()); gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence());
for (const auto& delegate : attachedBuffers) for (const auto &delegate : attachedBuffers)
delegate->buffer->InvalidateMegaBuffer(); delegate->buffer->InvalidateMegaBuffer();
nodes.clear(); nodes.clear();

View File

@ -24,10 +24,22 @@ namespace skyline::gpu::interconnect {
using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>; using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
std::unordered_set<SharedBufferDelegate> attachedBuffers; //!< All buffers that are attached to the current execution std::unordered_set<SharedBufferDelegate> attachedBuffers; //!< All buffers that are attached to the current execution
std::vector<TextureView*> lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass
span<TextureView*> lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass
span<TextureView*> lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass
TextureView* lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass
/** /**
* @return If a new render pass was created by the function or the current one was reused as it was compatible * @brief Create a new render pass and subpass with the specified attachments, if one doesn't already exist or the current one isn't compatible
* @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible
* @return If the next subpass must be started prior to issuing any commands
*/ */
bool CreateRenderPass(vk::Rect2D renderArea); bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment);
/**
* @brief Ends a render pass if one is currently active and resets all corresponding state
*/
void FinishRenderPass();
public: public:
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands