Implement Render Pass Cache

Implements a cache for storing `VkRenderPass` objects which are often reused, they are not extremely expensive to create generally but this is a required step to build up to a framebuffer cache which is an extremely expensive object to create on TBDRs generally since it involves calculating tiling memory allocations and in the case of Adreno's proprietary driver involves several kernel calls for mapping and allocating the corresponding memory.
This commit is contained in:
PixelyIon 2022-05-01 18:15:50 +05:30
parent ae77bde171
commit da931cf07b
9 changed files with 278 additions and 21 deletions

View File

@ -170,6 +170,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/gpu/shader_manager.cpp ${source_DIR}/skyline/gpu/shader_manager.cpp
${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp
${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp
${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp
${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp
${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp

View File

@ -278,5 +278,6 @@ namespace skyline::gpu {
buffer(*this), buffer(*this),
descriptor(*this), descriptor(*this),
shader(state, *this), shader(state, *this),
graphicsPipelineCache(*this) {} graphicsPipelineCache(*this),
renderPassCache(*this) {}
} }

View File

@ -12,6 +12,7 @@
#include "gpu/descriptor_allocator.h" #include "gpu/descriptor_allocator.h"
#include "gpu/shader_manager.h" #include "gpu/shader_manager.h"
#include "gpu/cache/graphics_pipeline_cache.h" #include "gpu/cache/graphics_pipeline_cache.h"
#include "gpu/cache/renderpass_cache.h"
namespace skyline::gpu { namespace skyline::gpu {
static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require
@ -47,6 +48,7 @@ namespace skyline::gpu {
ShaderManager shader; ShaderManager shader;
cache::GraphicsPipelineCache graphicsPipelineCache; cache::GraphicsPipelineCache graphicsPipelineCache;
cache::RenderPassCache renderPassCache;
GPU(const DeviceState &state); GPU(const DeviceState &state);
}; };

View File

@ -0,0 +1,23 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <vulkan/vulkan_raii.hpp>
#include <common.h>
namespace skyline::gpu::cache {
/**
* @brief All unique metadata in a single attachment for a compatible render pass according to Render Pass Compatibility clause in the Vulkan specification
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#renderpass-compatibility
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkAttachmentDescription.html
*/
struct AttachmentMetadata {
vk::Format format;
vk::SampleCountFlagBits sampleCount;
constexpr AttachmentMetadata(vk::Format format, vk::SampleCountFlagBits sampleCount) : format(format), sampleCount(sampleCount) {}
bool operator==(const AttachmentMetadata &rhs) const = default;
};
}

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <gpu/texture/texture.h> #include <gpu/texture/texture.h>
#include "common.h"
namespace skyline::gpu::cache { namespace skyline::gpu::cache {
/** /**
@ -47,18 +48,6 @@ namespace skyline::gpu::cache {
}; };
private: private:
/**
* @brief All unique metadata a single attachment for a compatible pipeline according to Render Pass Compatibility clause in the Vulkan specification
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#renderpass-compatibility
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkAttachmentDescription.html
*/
struct AttachmentMetadata {
vk::Format format;
vk::SampleCountFlagBits sampleCount;
bool operator==(const AttachmentMetadata &rhs) const = default;
};
/** /**
* @brief All data in PipelineState in value form to allow cheap heterogenous lookups with reference types while still storing a value-based key in the map * @brief All data in PipelineState in value form to allow cheap heterogenous lookups with reference types while still storing a value-based key in the map
*/ */

View File

@ -0,0 +1,175 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <boost/functional/hash.hpp>
#include <gpu.h>
#include "renderpass_cache.h"
namespace skyline::gpu::cache {
RenderPassCache::RenderPassCache(gpu::GPU &gpu) : gpu(gpu) {}
RenderPassCache::RenderPassMetadata::RenderPassMetadata(const vk::RenderPassCreateInfo &createInfo) {
for (const auto &attachment : span<const vk::AttachmentDescription>{createInfo.pAttachments, createInfo.attachmentCount})
attachments.emplace_back(attachment.format, attachment.samples);
subpasses.reserve(createInfo.subpassCount);
for (const auto &subpass : span<const vk::SubpassDescription>{createInfo.pSubpasses, createInfo.subpassCount}) {
auto &subpassMetadata{subpasses.emplace_back()};
subpassMetadata.inputAttachments.reserve(subpass.inputAttachmentCount);
for (const auto &reference : span<const vk::AttachmentReference>{subpass.pInputAttachments, subpass.inputAttachmentCount})
subpassMetadata.inputAttachments.emplace_back(reference.attachment);
subpassMetadata.colorAttachments.reserve(subpass.colorAttachmentCount);
for (const auto &reference : span<const vk::AttachmentReference>{subpass.pColorAttachments, subpass.colorAttachmentCount})
subpassMetadata.colorAttachments.emplace_back(reference.attachment);
auto resolveAttachmentCount{subpass.pResolveAttachments ? subpass.colorAttachmentCount : 0};
subpassMetadata.resolveAttachments.reserve(resolveAttachmentCount);
for (const auto &reference : span<const vk::AttachmentReference>{subpass.pResolveAttachments, resolveAttachmentCount})
subpassMetadata.resolveAttachments.emplace_back(reference.attachment);
if (subpass.pDepthStencilAttachment)
subpassMetadata.depthStencilAttachment.emplace(subpass.pDepthStencilAttachment->attachment);
subpassMetadata.preserveAttachments.reserve(subpass.preserveAttachmentCount);
for (const auto &index : span<const u32>{subpass.pPreserveAttachments, subpass.preserveAttachmentCount})
subpassMetadata.resolveAttachments.emplace_back(index);
}
}
#define HASH(x) boost::hash_combine(hash, x)
size_t RenderPassCache::RenderPassHash::operator()(const RenderPassMetadata &key) const {
size_t hash{};
HASH(key.attachments.size());
for (const auto &attachment : key.attachments) {
HASH(attachment.format);
HASH(attachment.sampleCount);
}
HASH(key.subpasses.size());
for (const auto &subpass : key.subpasses) {
HASH(subpass.inputAttachments.size());
for (const auto &reference : subpass.inputAttachments)
HASH(reference);
HASH(subpass.colorAttachments.size());
for (const auto &reference : subpass.colorAttachments)
HASH(reference);
HASH(subpass.resolveAttachments.size());
for (const auto &reference : subpass.resolveAttachments)
HASH(reference);
HASH(subpass.depthStencilAttachment.has_value());
if (subpass.depthStencilAttachment)
HASH(*subpass.depthStencilAttachment);
HASH(subpass.preserveAttachments.size());
for (const auto &index : subpass.preserveAttachments)
HASH(index);
}
return hash;
}
size_t RenderPassCache::RenderPassHash::operator()(const vk::RenderPassCreateInfo &key) const {
size_t hash{};
HASH(key.attachmentCount);
for (const auto &attachment : span<const vk::AttachmentDescription>{key.pAttachments, key.attachmentCount}) {
HASH(attachment.format);
HASH(attachment.samples);
}
HASH(key.subpassCount);
for (const auto &subpass : span<const vk::SubpassDescription>{key.pSubpasses, key.subpassCount}) {
HASH(subpass.inputAttachmentCount);
for (const auto &reference : span<const vk::AttachmentReference>{subpass.pInputAttachments, subpass.inputAttachmentCount})
HASH(reference.attachment);
HASH(subpass.colorAttachmentCount);
for (const auto &reference : span<const vk::AttachmentReference>{subpass.pColorAttachments, subpass.colorAttachmentCount})
HASH(reference.attachment);
u32 resolveAttachmentCount{subpass.pResolveAttachments ? subpass.colorAttachmentCount : 0};
HASH(resolveAttachmentCount);
for (const auto &reference : span<const vk::AttachmentReference>{subpass.pResolveAttachments, resolveAttachmentCount})
HASH(reference.attachment);
HASH(subpass.pDepthStencilAttachment != nullptr);
if (subpass.pDepthStencilAttachment)
HASH(subpass.pDepthStencilAttachment->attachment);
HASH(subpass.preserveAttachmentCount);
for (const auto &index : span<const u32>{subpass.pPreserveAttachments, subpass.preserveAttachmentCount})
HASH(index);
}
return hash;
}
#undef HASH
bool RenderPassCache::RenderPassEqual::operator()(const RenderPassMetadata &lhs, const RenderPassMetadata &rhs) const {
return lhs == rhs;
}
bool RenderPassCache::RenderPassEqual::operator()(const RenderPassMetadata &lhs, const vk::RenderPassCreateInfo &rhs) const {
#define RETF(condition) if (condition) { return false; }
RETF(lhs.attachments.size() != rhs.attachmentCount)
const vk::AttachmentDescription *vkAttachment{rhs.pAttachments};
for (const auto &attachment : lhs.attachments) {
RETF(attachment.format != vkAttachment->format)
RETF(attachment.sampleCount != vkAttachment->samples)
vkAttachment++;
}
RETF(lhs.subpasses.size() != rhs.subpassCount)
const vk::SubpassDescription *vkSubpass{rhs.pSubpasses};
for (const auto &subpass : lhs.subpasses) {
RETF(subpass.inputAttachments.size() != vkSubpass->inputAttachmentCount)
const vk::AttachmentReference *vkReference{vkSubpass->pInputAttachments};
for (const auto &reference : subpass.inputAttachments)
RETF(reference != (vkReference++)->attachment)
RETF(subpass.colorAttachments.size() != vkSubpass->colorAttachmentCount)
vkReference = vkSubpass->pColorAttachments;
for (const auto &reference : subpass.colorAttachments)
RETF(reference != (vkReference++)->attachment)
RETF(subpass.resolveAttachments.size() != (vkSubpass->pResolveAttachments ? vkSubpass->colorAttachmentCount : 0))
vkReference = vkSubpass->pResolveAttachments;
for (const auto &reference : subpass.resolveAttachments)
RETF(reference != (vkReference++)->attachment)
RETF(subpass.depthStencilAttachment.has_value() != (vkSubpass->pDepthStencilAttachment != nullptr))
if (subpass.depthStencilAttachment)
RETF(*subpass.depthStencilAttachment != vkSubpass->pDepthStencilAttachment->attachment)
RETF(subpass.preserveAttachments.size() != vkSubpass->preserveAttachmentCount)
const u32 *vkIndex{vkSubpass->pPreserveAttachments};
for (const auto &attachment : subpass.preserveAttachments)
RETF(attachment != *(vkIndex++))
vkSubpass++;
}
#undef RETF
return true;
}
vk::RenderPass RenderPassCache::GetRenderPass(const vk::RenderPassCreateInfo &createInfo) {
std::scoped_lock lock{mutex};
auto it{renderPassCache.find(createInfo)};
if (it != renderPassCache.end())
return *it->second;
auto entryIt{renderPassCache.try_emplace(RenderPassMetadata{createInfo}, gpu.vkDevice, createInfo)};
return *entryIt.first->second;
}
}

View File

@ -0,0 +1,71 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "common.h"
namespace skyline::gpu::cache {
/**
* @brief A cache for Vulkan render passes to avoid unnecessary recreation and attain stability in handles for subsequent caches
*/
class RenderPassCache {
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the cache
using AttachmentReference = u32;
/**
* @brief All unique metadata in a single subpass for a compatible render pass according to Render Pass Compatibility clause in the Vulkan specification
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#renderpass-compatibility
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkSubpassDescription.html
*/
struct SubpassMetadata {
std::vector<AttachmentReference> inputAttachments;
std::vector<AttachmentReference> colorAttachments;
std::vector<AttachmentReference> resolveAttachments;
std::optional<AttachmentReference> depthStencilAttachment;
std::vector<AttachmentReference> preserveAttachments;
bool operator==(const SubpassMetadata &rhs) const = default;
};
/**
* @brief All unique metadata in a render pass for a corresponding compatible render pass according to Render Pass Compatibility clause in the Vulkan specification
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#renderpass-compatibility
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkRenderPassCreateInfo.html
*/
struct RenderPassMetadata {
std::vector<AttachmentMetadata> attachments;
std::vector<SubpassMetadata> subpasses;
RenderPassMetadata(const vk::RenderPassCreateInfo &createInfo);
bool operator==(const RenderPassMetadata &other) const = default;
};
struct RenderPassHash {
using is_transparent = std::true_type;
size_t operator()(const RenderPassMetadata &key) const;
size_t operator()(const vk::RenderPassCreateInfo &key) const;
};
struct RenderPassEqual {
using is_transparent = std::true_type;
bool operator()(const RenderPassMetadata &lhs, const RenderPassMetadata &rhs) const;
bool operator()(const RenderPassMetadata &lhs, const vk::RenderPassCreateInfo &rhs) const;
};
std::unordered_map<RenderPassMetadata, vk::raii::RenderPass, RenderPassHash, RenderPassEqual> renderPassCache;
public:
RenderPassCache(GPU &gpu);
vk::RenderPass GetRenderPass(const vk::RenderPassCreateInfo &createInfo);
};
}

View File

@ -19,12 +19,9 @@ namespace skyline::gpu::interconnect::node {
), storage(std::make_shared<Storage>()), renderArea(renderArea) {} ), storage(std::make_shared<Storage>()), renderArea(renderArea) {}
RenderPassNode::Storage::~Storage() { RenderPassNode::Storage::~Storage() {
if (device) { if (device)
if (framebuffer) if (framebuffer)
(**device).destroy(framebuffer, nullptr, *device->getDispatcher()); (**device).destroy(framebuffer, nullptr, *device->getDispatcher());
if (renderPass)
(**device).destroy(renderPass, nullptr, *device->getDispatcher());
}
} }
u32 RenderPassNode::AddAttachment(TextureView *view) { u32 RenderPassNode::AddAttachment(TextureView *view) {
@ -217,15 +214,14 @@ namespace skyline::gpu::interconnect::node {
preserveAttachmentIt++; preserveAttachmentIt++;
} }
auto renderPass{(*gpu.vkDevice).createRenderPass(vk::RenderPassCreateInfo{ auto renderPass{gpu.renderPassCache.GetRenderPass(vk::RenderPassCreateInfo{
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()), .attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
.pAttachments = attachmentDescriptions.data(), .pAttachments = attachmentDescriptions.data(),
.subpassCount = static_cast<u32>(subpassDescriptions.size()), .subpassCount = static_cast<u32>(subpassDescriptions.size()),
.pSubpasses = subpassDescriptions.data(), .pSubpasses = subpassDescriptions.data(),
.dependencyCount = static_cast<u32>(subpassDependencies.size()), .dependencyCount = static_cast<u32>(subpassDependencies.size()),
.pDependencies = subpassDependencies.data(), .pDependencies = subpassDependencies.data(),
}, nullptr, *gpu.vkDevice.getDispatcher())}; })};
storage->renderPass = renderPass;
auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{ auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{
.renderPass = renderPass, .renderPass = renderPass,

View File

@ -34,7 +34,6 @@ namespace skyline::gpu::interconnect::node {
struct Storage : public FenceCycleDependency { struct Storage : public FenceCycleDependency {
vk::raii::Device *device{}; vk::raii::Device *device{};
vk::Framebuffer framebuffer{}; vk::Framebuffer framebuffer{};
vk::RenderPass renderPass{};
~Storage(); ~Storage();
}; };