Implement support for (de)serialising VkPipelineCaches to/from storage

Significantly improves launch times in games with many shader combinations, giving an 5x speedup in some cases.
This commit is contained in:
Billy Laws 2023-01-18 19:49:55 +00:00
parent db173083d7
commit dd92cb1536
12 changed files with 327 additions and 480 deletions

View File

@ -401,12 +401,12 @@ namespace skyline::gpu {
megaBufferAllocator(*this), megaBufferAllocator(*this),
descriptor(*this), descriptor(*this),
helperShaders(*this, state.os->assetFileSystem), helperShaders(*this, state.os->assetFileSystem),
graphicsPipelineCache(*this),
renderPassCache(*this), renderPassCache(*this),
framebufferCache(*this) {} framebufferCache(*this) {}
void GPU::Initialise() { void GPU::Initialise() {
std::string titleId{state.loader->nacp->GetSaveDataOwnerId()}; std::string titleId{state.loader->nacp->GetSaveDataOwnerId()};
graphicsPipelineAssembler.emplace(*this, state.os->publicAppFilesPath + "vk_graphics_pipeline_cache/" + titleId);
shader.emplace(state, *this, shader.emplace(state, *this,
state.os->publicAppFilesPath + "shader_replacements/" + titleId, state.os->publicAppFilesPath + "shader_replacements/" + titleId,
state.os->publicAppFilesPath + "shader_dumps/" + titleId); state.os->publicAppFilesPath + "shader_dumps/" + titleId);

View File

@ -14,8 +14,8 @@
#include "gpu/descriptor_allocator.h" #include "gpu/descriptor_allocator.h"
#include "gpu/shader_manager.h" #include "gpu/shader_manager.h"
#include "gpu/pipeline_cache_manager.h" #include "gpu/pipeline_cache_manager.h"
#include "gpu/graphics_pipeline_assembler.h"
#include "gpu/shaders/helper_shaders.h" #include "gpu/shaders/helper_shaders.h"
#include "gpu/cache/graphics_pipeline_cache.h"
#include "gpu/cache/renderpass_cache.h" #include "gpu/cache/renderpass_cache.h"
#include "gpu/cache/framebuffer_cache.h" #include "gpu/cache/framebuffer_cache.h"
#include "gpu/interconnect/maxwell_3d/pipeline_manager.h" #include "gpu/interconnect/maxwell_3d/pipeline_manager.h"
@ -59,7 +59,7 @@ namespace skyline::gpu {
HelperShaders helperShaders; HelperShaders helperShaders;
cache::GraphicsPipelineCache graphicsPipelineCache; std::optional<GraphicsPipelineAssembler> graphicsPipelineAssembler;
cache::RenderPassCache renderPassCache; cache::RenderPassCache renderPassCache;
cache::FramebufferCache framebufferCache; cache::FramebufferCache framebufferCache;

View File

@ -1,406 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <boost/functional/hash.hpp>
#include <gpu.h>
#include "graphics_pipeline_cache.h"
namespace skyline::gpu::cache {
GraphicsPipelineCache::GraphicsPipelineCache(GPU &gpu)
: gpu{gpu},
vkPipelineCache{gpu.vkDevice, vk::PipelineCacheCreateInfo{}},
pool{gpu.traits.quirks.brokenMultithreadedPipelineCompilation ? 1U : 0U} {}
#define VEC_CPY(pointer, size) state.pointer, state.pointer + state.size
GraphicsPipelineCache::PipelineCacheKey::PipelineCacheKey(const GraphicsPipelineCache::PipelineState &state)
: shaderStages(state.shaderStages.begin(), state.shaderStages.end()),
vertexState(state.vertexState),
vertexBindings(VEC_CPY(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount)),
vertexAttributes(VEC_CPY(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount)),
vertexDivisors(VEC_CPY(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount)),
inputAssemblyState(state.inputAssemblyState),
tessellationState(state.tessellationState),
viewportState(state.viewportState),
viewports(VEC_CPY(viewportState.pViewports, viewportState.viewportCount)),
scissors(VEC_CPY(viewportState.pScissors, viewportState.scissorCount)),
rasterizationState(state.rasterizationState),
multisampleState(state.multisampleState),
depthStencilState(state.depthStencilState),
colorBlendState(state.colorBlendState),
dynamicStates(VEC_CPY(dynamicState.pDynamicStates, dynamicState.dynamicStateCount)),
dynamicState(state.dynamicState),
colorBlendAttachments(VEC_CPY(colorBlendState.pAttachments, colorBlendState.attachmentCount)) {
auto &vertexInputState{vertexState.get<vk::PipelineVertexInputStateCreateInfo>()};
vertexInputState.pVertexBindingDescriptions = vertexBindings.data();
vertexInputState.pVertexAttributeDescriptions = vertexAttributes.data();
vertexState.get<vk::PipelineVertexInputDivisorStateCreateInfoEXT>().pVertexBindingDivisors = vertexDivisors.data();
viewportState.pViewports = viewports.data();
viewportState.pScissors = scissors.data();
colorBlendState.pAttachments = colorBlendAttachments.data();
dynamicState.pDynamicStates = dynamicStates.data();
for (auto &colorFormat : state.colorFormats)
colorFormats.emplace_back(colorFormat);
depthStencilFormat = state.depthStencilFormat;
sampleCount = state.sampleCount;
}
#undef VEC_CPY
#define HASH(x) boost::hash_combine(hash, x)
template<typename T>
size_t HashCommonPipelineState(const T &key, size_t hash = 0) {
HASH(key.shaderStages.size());
for (const auto &stage : key.shaderStages) {
HASH(stage.stage);
HASH(static_cast<VkShaderModule>(stage.module));
}
auto &vertexInputState{key.VertexInputState()};
HASH(vertexInputState.vertexBindingDescriptionCount);
HASH(vertexInputState.vertexAttributeDescriptionCount);
HASH(static_cast<VkFlags>(vertexInputState.flags));
for (size_t i{}; i < vertexInputState.vertexBindingDescriptionCount; i++) {
const auto &descr{vertexInputState.pVertexBindingDescriptions[i]};
HASH(descr.binding);
HASH(descr.stride);
HASH(static_cast<VkVertexInputRate>(descr.inputRate));
}
for (size_t i{}; i < vertexInputState.vertexAttributeDescriptionCount; i++) {
const auto &descr{vertexInputState.pVertexAttributeDescriptions[i]};
HASH(descr.binding);
HASH(descr.offset);
HASH(descr.location);
HASH(static_cast<VkFormat>(descr.format));
}
if (key.vertexState.template isLinked<vk::PipelineVertexInputDivisorStateCreateInfoEXT>())
HASH(key.VertexDivisorState().vertexBindingDivisorCount);
HASH(key.inputAssemblyState.topology);
HASH(key.inputAssemblyState.primitiveRestartEnable);
HASH(key.tessellationState.patchControlPoints);
HASH(key.viewportState.viewportCount);
HASH(key.viewportState.scissorCount);
for (size_t i{}; i < key.viewportState.viewportCount; i++) {
const auto &viewport{key.viewportState.pViewports[i]};
HASH(viewport.x);
HASH(viewport.y);
HASH(viewport.width);
HASH(viewport.height);
HASH(viewport.minDepth);
HASH(viewport.maxDepth);
}
for (size_t i{}; i < key.viewportState.scissorCount; i++) {
const auto &scissor{key.viewportState.pScissors[i]};
HASH(scissor.offset.x);
HASH(scissor.offset.y);
HASH(scissor.extent.width);
HASH(scissor.extent.height);
}
auto &rasterizationState{key.RasterizationState()};
HASH(rasterizationState.depthClampEnable);
HASH(rasterizationState.rasterizerDiscardEnable);
HASH(rasterizationState.polygonMode);
HASH(std::hash<vk::CullModeFlags>{}(rasterizationState.cullMode));
HASH(rasterizationState.frontFace);
HASH(rasterizationState.depthBiasEnable);
HASH(rasterizationState.depthBiasConstantFactor);
HASH(rasterizationState.depthBiasClamp);
HASH(rasterizationState.depthBiasSlopeFactor);
HASH(rasterizationState.lineWidth);
if (key.rasterizationState.template isLinked<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>())
HASH(key.ProvokingVertexState().provokingVertexMode);
HASH(key.multisampleState.rasterizationSamples);
HASH(key.multisampleState.sampleShadingEnable);
HASH(key.multisampleState.minSampleShading);
HASH(key.multisampleState.alphaToCoverageEnable);
HASH(key.multisampleState.alphaToOneEnable);
HASH(key.depthStencilState.depthTestEnable);
HASH(key.depthStencilState.depthWriteEnable);
HASH(key.depthStencilState.depthCompareOp);
HASH(key.depthStencilState.depthBoundsTestEnable);
HASH(key.depthStencilState.stencilTestEnable);
HASH(key.depthStencilState.front.compareOp);
HASH(key.depthStencilState.front.failOp);
HASH(key.depthStencilState.front.passOp);
HASH(key.depthStencilState.front.depthFailOp);
HASH(key.depthStencilState.front.compareMask);
HASH(key.depthStencilState.front.writeMask);
HASH(key.depthStencilState.front.reference);
HASH(key.depthStencilState.back.compareOp);
HASH(key.depthStencilState.back.failOp);
HASH(key.depthStencilState.back.passOp);
HASH(key.depthStencilState.back.depthFailOp);
HASH(key.depthStencilState.back.compareMask);
HASH(key.depthStencilState.back.writeMask);
HASH(key.depthStencilState.back.reference);
HASH(key.depthStencilState.minDepthBounds);
HASH(key.depthStencilState.maxDepthBounds);
HASH(key.colorBlendState.logicOpEnable);
HASH(key.colorBlendState.logicOp);
HASH(key.colorBlendState.attachmentCount);
for (size_t i{}; i < key.colorBlendState.attachmentCount; i++) {
const auto &attachment{key.colorBlendState.pAttachments[i]};
HASH(static_cast<VkBool32>(attachment.blendEnable));
HASH(static_cast<VkBlendOp>(attachment.alphaBlendOp));
HASH(static_cast<VkBlendOp>(attachment.colorBlendOp));
HASH(static_cast<VkBlendFactor>(attachment.dstAlphaBlendFactor));
HASH(static_cast<VkBlendFactor>(attachment.dstColorBlendFactor));
HASH(static_cast<VkBlendFactor>(attachment.srcAlphaBlendFactor));
HASH(static_cast<VkBlendFactor>(attachment.srcColorBlendFactor));
}
HASH(key.dynamicState.dynamicStateCount);
HASH(key.colorFormats.size());
for (auto format : key.colorFormats)
HASH(format);
HASH(key.depthStencilFormat);
HASH(key.sampleCount);
return hash;
}
size_t GraphicsPipelineCache::PipelineStateHash::operator()(const GraphicsPipelineCache::PipelineState &key) const {
return HashCommonPipelineState(key);
}
size_t GraphicsPipelineCache::PipelineStateHash::operator()(const GraphicsPipelineCache::PipelineCacheKey &key) const {
return HashCommonPipelineState(key);
}
#undef HASH
bool GraphicsPipelineCache::PipelineCacheEqual::operator()(const GraphicsPipelineCache::PipelineCacheKey &lhs, const GraphicsPipelineCache::PipelineState &rhs) const {
#define RETF(condition) if (condition) { return false; }
#define KEYEQ(member) (lhs.member == rhs.member)
#define KEYNEQ(member) (lhs.member != rhs.member)
static constexpr auto NotEqual{[](auto pointer, auto size, auto pointer2, auto size2, auto equalFunction) -> bool {
return
size != size2 ||
!std::equal(pointer, pointer + static_cast<ssize_t>(size), pointer2, equalFunction);
}};
#define CARREQ(pointer, size, func) NotEqual(lhs.pointer, lhs.size, rhs.pointer, rhs.size, [](decltype(*lhs.pointer) &lhs, decltype(*rhs.pointer) &rhs) { func }) // Note: typeof(*lhs/rhs.pointer) is required for clangd to resolve the parameter type correctly for autocomplete
#define ARREQ(pointer, size) CARREQ(pointer, size, { return lhs == rhs; })
RETF(CARREQ(shaderStages.begin(), shaderStages.size(), {
return KEYEQ(flags) && KEYEQ(stage) && KEYEQ(module) && std::strcmp(lhs.pName, rhs.pName) == 0;
// Note: We intentionally ignore specialization constants here
}))
RETF(KEYNEQ(VertexInputState().flags) ||
ARREQ(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount) ||
ARREQ(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount)
)
RETF(KEYNEQ(vertexState.isLinked<vk::PipelineVertexInputDivisorStateCreateInfoEXT>()) ||
(lhs.vertexState.isLinked<vk::PipelineVertexInputDivisorStateCreateInfoEXT>() &&
ARREQ(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount)
)
)
RETF(KEYNEQ(tessellationState.flags) || KEYNEQ(tessellationState.patchControlPoints))
RETF(KEYNEQ(inputAssemblyState.flags) || KEYNEQ(inputAssemblyState.topology) || KEYNEQ(inputAssemblyState.primitiveRestartEnable))
RETF(KEYNEQ(viewportState.flags) ||
ARREQ(viewportState.pViewports, viewportState.viewportCount) ||
ARREQ(viewportState.pScissors, viewportState.scissorCount)
)
RETF(KEYNEQ(RasterizationState().flags) ||
KEYNEQ(RasterizationState().depthClampEnable) ||
KEYNEQ(RasterizationState().rasterizerDiscardEnable) ||
KEYNEQ(RasterizationState().polygonMode) ||
KEYNEQ(RasterizationState().cullMode) ||
KEYNEQ(RasterizationState().frontFace) ||
KEYNEQ(RasterizationState().depthBiasEnable) ||
KEYNEQ(RasterizationState().depthBiasConstantFactor) ||
KEYNEQ(RasterizationState().depthBiasClamp) ||
KEYNEQ(RasterizationState().depthBiasSlopeFactor) ||
KEYNEQ(RasterizationState().lineWidth)
)
RETF(KEYNEQ(rasterizationState.isLinked<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>()) ||
(lhs.rasterizationState.isLinked<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>() &&
KEYNEQ(ProvokingVertexState().provokingVertexMode)
)
)
RETF(KEYNEQ(multisampleState.flags) ||
KEYNEQ(multisampleState.rasterizationSamples) ||
KEYNEQ(multisampleState.sampleShadingEnable) ||
KEYNEQ(multisampleState.minSampleShading) ||
KEYNEQ(multisampleState.alphaToCoverageEnable) ||
KEYNEQ(multisampleState.alphaToOneEnable)
)
RETF(KEYNEQ(depthStencilState.flags) ||
KEYNEQ(depthStencilState.depthTestEnable) ||
KEYNEQ(depthStencilState.depthWriteEnable) ||
KEYNEQ(depthStencilState.depthCompareOp) ||
KEYNEQ(depthStencilState.depthBoundsTestEnable) ||
KEYNEQ(depthStencilState.stencilTestEnable) ||
KEYNEQ(depthStencilState.front) ||
KEYNEQ(depthStencilState.back) ||
KEYNEQ(depthStencilState.minDepthBounds) ||
KEYNEQ(depthStencilState.maxDepthBounds)
)
RETF(ARREQ(dynamicState.pDynamicStates, dynamicState.dynamicStateCount))
RETF(KEYNEQ(colorBlendState.flags) ||
KEYNEQ(colorBlendState.logicOpEnable) ||
KEYNEQ(colorBlendState.logicOp) ||
ARREQ(colorBlendState.pAttachments, colorBlendState.attachmentCount) ||
KEYNEQ(colorBlendState.blendConstants)
)
RETF(CARREQ(colorFormats.begin(), colorFormats.size(), {
return lhs == rhs;
}))
RETF(lhs.depthStencilFormat == rhs.depthStencilFormat)
RETF(lhs.sampleCount == rhs.sampleCount)
#undef ARREQ
#undef CARREQ
#undef KEYNEQ
#undef KEYEQ
#undef RETF
return true;
}
bool GraphicsPipelineCache::PipelineCacheEqual::operator()(const PipelineCacheKey &lhs, const PipelineCacheKey &rhs) const {
return lhs == rhs;
}
GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout{std::move(descriptorSetLayout)}, pipelineLayout{std::move(pipelineLayout)} {}
vk::raii::Pipeline GraphicsPipelineCache::BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout) {
boost::container::small_vector<vk::AttachmentDescription, 8> attachmentDescriptions;
boost::container::small_vector<vk::AttachmentReference, 8> attachmentReferences;
auto pushAttachment{[&](vk::Format format) {
if (format != vk::Format::eUndefined) {
attachmentDescriptions.push_back(vk::AttachmentDescription{
.format = format,
.samples = key.sampleCount,
.loadOp = vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = vk::ImageLayout::eGeneral,
.finalLayout = vk::ImageLayout::eGeneral,
.flags = vk::AttachmentDescriptionFlagBits::eMayAlias
});
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = static_cast<u32>(attachmentDescriptions.size() - 1),
.layout = vk::ImageLayout::eGeneral,
});
} else {
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = VK_ATTACHMENT_UNUSED,
.layout = vk::ImageLayout::eUndefined,
});
}
}};
vk::SubpassDescription subpassDescription{
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
};
for (auto &colorAttachment : key.colorFormats)
pushAttachment(colorAttachment);
if (key.depthStencilFormat != vk::Format::eUndefined) {
pushAttachment(key.depthStencilFormat);
subpassDescription.pColorAttachments = attachmentReferences.data();
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size() - 1);
subpassDescription.pDepthStencilAttachment = &attachmentReferences.back();
} else {
subpassDescription.pColorAttachments = attachmentReferences.data();
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size());
}
vk::raii::RenderPass renderPass{gpu.vkDevice, vk::RenderPassCreateInfo{
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
.pAttachments = attachmentDescriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpassDescription,
}};
return gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
.pStages = key.shaderStages.data(),
.stageCount = static_cast<u32>(key.shaderStages.size()),
.pVertexInputState = &key.vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
.pInputAssemblyState = &key.inputAssemblyState,
.pViewportState = &key.viewportState,
.pRasterizationState = &key.rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
.pMultisampleState = &key.multisampleState,
.pDepthStencilState = &key.depthStencilState,
.pColorBlendState = &key.colorBlendState,
.pDynamicState = &key.dynamicState,
.layout = pipelineLayout,
.renderPass = *renderPass,
.subpass = 0,
});
}
GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout{*entry.descriptorSetLayout}, pipelineLayout{*entry.pipelineLayout}, pipeline{*entry.pipeline} {}
GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges, bool noPushDescriptors) {
std::unique_lock lock(mutex);
auto it{pipelineCache.find(state)};
if (it != pipelineCache.end())
return CompiledPipeline{it->second};
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
.flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
.pBindings = layoutBindings.data(),
.bindingCount = static_cast<u32>(layoutBindings.size()),
}};
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
.pSetLayouts = &*descriptorSetLayout,
.setLayoutCount = 1,
.pPushConstantRanges = pushConstantRanges.data(),
.pushConstantRangeCount = static_cast<u32>(pushConstantRanges.size()),
}};
auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout))};
auto pipelineFuture{pool.submit(&GraphicsPipelineCache::BuildPipeline, this, std::ref(pipelineEntryIt.first->first), std::ref(*pipelineEntryIt.first->second.pipelineLayout))};
pipelineEntryIt.first->second.pipeline = pipelineFuture.share();
return CompiledPipeline{pipelineEntryIt.first->second};
}
void GraphicsPipelineCache::WaitIdle() {
pool.wait_for_tasks();
}
}

View File

@ -0,0 +1,254 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <boost/functional/hash.hpp>
#include <filesystem>
#include <gpu.h>
#include "graphics_pipeline_assembler.h"
#include "trait_manager.h"
namespace skyline::gpu {
/**
* @brief Unique header serialized into the pipeline cache filename as a hexdump to identify a particular driver
*/
struct PipelineCacheFileNameHeader {
u32 vendorId; //!< The driver reported vendor ID
u32 deviceId; //!< The driver reported device ID
u32 driverVersion; //!< The driver reported version
std::array<u8, VK_UUID_SIZE> uuid; //!< The driver reported pipeline cache UUID
PipelineCacheFileNameHeader(const TraitManager &traits)
: vendorId{traits.vendorId},
deviceId{traits.deviceId},
driverVersion{traits.driverVersion},
uuid{traits.pipelineCacheUuid} {}
std::string HexDump() {
return util::HexDump(span<u8>{reinterpret_cast<u8 *>(this), sizeof(PipelineCacheFileNameHeader)});
}
};
/**
* @brief Header that precedes serialized pipeline cache data in the pipeline cache file
*/
struct PipelineCacheFileDataHeader {
u64 size;
u64 hash;
u8 data[];
};
static_assert(sizeof(PipelineCacheFileDataHeader) == 0x10);
static vk::raii::PipelineCache DeserialisePipelineCache(GPU &gpu, std::string_view pipelineCacheDir) {
std::filesystem::create_directories(pipelineCacheDir);
PipelineCacheFileNameHeader expectedFilenameHeader{gpu.traits};
std::filesystem::path path{std::filesystem::path{pipelineCacheDir} / expectedFilenameHeader.HexDump()};
if (!std::filesystem::exists(path))
return {gpu.vkDevice, vk::PipelineCacheCreateInfo{}};
std::ifstream stream{path, std::ios::binary};
if (stream.fail()) {
Logger::Warn("Failed to open Vulkan pipeline cache!");
return {gpu.vkDevice, vk::PipelineCacheCreateInfo{}};
}
PipelineCacheFileDataHeader header{};
stream.read(reinterpret_cast<char *>(&header), sizeof(PipelineCacheFileDataHeader));
std::vector<u8> readData(header.size);
stream.read(reinterpret_cast<char *>(readData.data()), static_cast<std::streamsize>(header.size));
if (header.hash != XXH64(readData.data(), readData.size(), 0)) {
Logger::Warn("Ignoring invalid pipeline cache file!");
return {gpu.vkDevice, vk::PipelineCacheCreateInfo{}};
}
return {gpu.vkDevice, vk::PipelineCacheCreateInfo{
.initialDataSize = readData.size(),
.pInitialData = readData.data(),
}};
}
static void SerialisePipelineCache(GPU &gpu, std::string_view pipelineCacheDir, span<u8> data) {
PipelineCacheFileNameHeader expectedFilenameHeader{gpu.traits};
std::filesystem::path path{std::filesystem::path{pipelineCacheDir} / expectedFilenameHeader.HexDump()};
PipelineCacheFileDataHeader header{
.size = data.size(),
.hash = XXH64(data.data(), data.size(), 0)
};
std::ofstream stream{path, std::ios::binary | std::ios::trunc};
if (stream.fail()) {
Logger::Warn("Failed to write Vulkan pipeline cache!");
return;
}
stream.write(reinterpret_cast<char *>(&header), sizeof(PipelineCacheFileDataHeader));
stream.write(reinterpret_cast<char *>(data.data()), static_cast<std::streamsize>(data.size()));
Logger::Info("Wrote Vulkan pipeline cache to {} (size: 0x{:X} bytes)", path.string(), data.size());
}
GraphicsPipelineAssembler::GraphicsPipelineAssembler(GPU &gpu, std::string_view pipelineCacheDir)
: gpu{gpu},
vkPipelineCache{DeserialisePipelineCache(gpu, pipelineCacheDir)},
pool{gpu.traits.quirks.brokenMultithreadedPipelineCompilation ? 1U : 0U},
pipelineCacheDir{pipelineCacheDir} {}
#define VEC_CPY(pointer, size) state.pointer, state.pointer + state.size
GraphicsPipelineAssembler::PipelineDescription::PipelineDescription(const GraphicsPipelineAssembler::PipelineState &state)
: shaderStages(state.shaderStages.begin(), state.shaderStages.end()),
vertexState(state.vertexState),
vertexBindings(VEC_CPY(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount)),
vertexAttributes(VEC_CPY(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount)),
vertexDivisors(VEC_CPY(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount)),
inputAssemblyState(state.inputAssemblyState),
tessellationState(state.tessellationState),
viewportState(state.viewportState),
viewports(VEC_CPY(viewportState.pViewports, viewportState.viewportCount)),
scissors(VEC_CPY(viewportState.pScissors, viewportState.scissorCount)),
rasterizationState(state.rasterizationState),
multisampleState(state.multisampleState),
depthStencilState(state.depthStencilState),
colorBlendState(state.colorBlendState),
dynamicStates(VEC_CPY(dynamicState.pDynamicStates, dynamicState.dynamicStateCount)),
dynamicState(state.dynamicState),
colorBlendAttachments(VEC_CPY(colorBlendState.pAttachments, colorBlendState.attachmentCount)) {
auto &vertexInputState{vertexState.get<vk::PipelineVertexInputStateCreateInfo>()};
vertexInputState.pVertexBindingDescriptions = vertexBindings.data();
vertexInputState.pVertexAttributeDescriptions = vertexAttributes.data();
vertexState.get<vk::PipelineVertexInputDivisorStateCreateInfoEXT>().pVertexBindingDivisors = vertexDivisors.data();
viewportState.pViewports = viewports.data();
viewportState.pScissors = scissors.data();
colorBlendState.pAttachments = colorBlendAttachments.data();
dynamicState.pDynamicStates = dynamicStates.data();
for (auto &colorFormat : state.colorFormats)
colorFormats.emplace_back(colorFormat);
depthStencilFormat = state.depthStencilFormat;
sampleCount = state.sampleCount;
destroyShaderModules = state.destroyShaderModules;
}
#undef VEC_CPY
vk::raii::Pipeline GraphicsPipelineAssembler::AssemblePipeline(std::list<PipelineDescription>::iterator pipelineDescIt, vk::PipelineLayout pipelineLayout) {
boost::container::small_vector<vk::AttachmentDescription, 8> attachmentDescriptions;
boost::container::small_vector<vk::AttachmentReference, 8> attachmentReferences;
auto pushAttachment{[&](vk::Format format) {
if (format != vk::Format::eUndefined) {
attachmentDescriptions.push_back(vk::AttachmentDescription{
.format = format,
.samples = pipelineDescIt->sampleCount,
.loadOp = vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = vk::ImageLayout::eGeneral,
.finalLayout = vk::ImageLayout::eGeneral,
.flags = vk::AttachmentDescriptionFlagBits::eMayAlias
});
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = static_cast<u32>(attachmentDescriptions.size() - 1),
.layout = vk::ImageLayout::eGeneral,
});
} else {
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = VK_ATTACHMENT_UNUSED,
.layout = vk::ImageLayout::eUndefined,
});
}
}};
vk::SubpassDescription subpassDescription{
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
};
for (auto &colorAttachment : pipelineDescIt->colorFormats)
pushAttachment(colorAttachment);
if (pipelineDescIt->depthStencilFormat != vk::Format::eUndefined) {
pushAttachment(pipelineDescIt->depthStencilFormat);
subpassDescription.pColorAttachments = attachmentReferences.data();
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size() - 1);
subpassDescription.pDepthStencilAttachment = &attachmentReferences.back();
} else {
subpassDescription.pColorAttachments = attachmentReferences.data();
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size());
}
vk::raii::RenderPass renderPass{gpu.vkDevice, vk::RenderPassCreateInfo{
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
.pAttachments = attachmentDescriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpassDescription,
}};
auto pipeline{gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
.pStages = pipelineDescIt->shaderStages.data(),
.stageCount = static_cast<u32>(pipelineDescIt->shaderStages.size()),
.pVertexInputState = &pipelineDescIt->vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
.pInputAssemblyState = &pipelineDescIt->inputAssemblyState,
.pViewportState = &pipelineDescIt->viewportState,
.pRasterizationState = &pipelineDescIt->rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
.pMultisampleState = &pipelineDescIt->multisampleState,
.pDepthStencilState = &pipelineDescIt->depthStencilState,
.pColorBlendState = &pipelineDescIt->colorBlendState,
.pDynamicState = &pipelineDescIt->dynamicState,
.layout = pipelineLayout,
.renderPass = *renderPass,
.subpass = 0,
})};
if (pipelineDescIt->destroyShaderModules)
for (auto &shaderStage : pipelineDescIt->shaderStages)
(*gpu.vkDevice).destroyShaderModule(shaderStage.module, nullptr, *gpu.vkDevice.getDispatcher());
std::scoped_lock lock{mutex};
compilePendingDescs.erase(pipelineDescIt);
return pipeline;
}
GraphicsPipelineAssembler::CompiledPipeline GraphicsPipelineAssembler::AssemblePipelineAsync(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges, bool noPushDescriptors) {
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
.flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
.pBindings = layoutBindings.data(),
.bindingCount = static_cast<u32>(layoutBindings.size()),
}};
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
.pSetLayouts = &*descriptorSetLayout,
.setLayoutCount = 1,
.pPushConstantRanges = pushConstantRanges.data(),
.pushConstantRangeCount = static_cast<u32>(pushConstantRanges.size()),
}};
auto descIt{[this, &state]() {
std::scoped_lock lock{mutex};
compilePendingDescs.emplace_back(state);
return std::prev(compilePendingDescs.end());
}()};
auto pipelineFuture{pool.submit(&GraphicsPipelineAssembler::AssemblePipeline, this, descIt, *pipelineLayout)};
return CompiledPipeline{std::move(descriptorSetLayout), std::move(pipelineLayout), std::move(pipelineFuture)};
}
void GraphicsPipelineAssembler::WaitIdle() {
pool.wait_for_tasks();
}
void GraphicsPipelineAssembler::SavePipelineCache() {
pool.submit([this] () {
std::vector<u8> rawData{vkPipelineCache.getData()};
SerialisePipelineCache(gpu, pipelineCacheDir, rawData);
});
}
}

View File

@ -11,12 +11,11 @@ namespace skyline::gpu {
class TextureView; class TextureView;
} }
namespace skyline::gpu::cache { namespace skyline::gpu {
/** /**
* @brief A cache for all Vulkan graphics pipelines objects used by the GPU to avoid costly re-creation * @brief Wrapper for Vulkan pipelines to allow for asynchronous compilation
* @note The cache is **not** compliant with Vulkan specification's Render Pass Compatibility clause when used with multi-subpass Render Passes but certain drivers may support a more relaxed version of this clause in practice which may allow it to be used with multi-subpass Render Passes
*/ */
class GraphicsPipelineCache { class GraphicsPipelineAssembler {
public: public:
/** /**
* @brief All unique state required to compile a graphics pipeline as references * @brief All unique state required to compile a graphics pipeline as references
@ -36,6 +35,7 @@ namespace skyline::gpu::cache {
span<vk::Format> colorFormats; //!< All color attachment formats in the subpass of this pipeline span<vk::Format> colorFormats; //!< All color attachment formats in the subpass of this pipeline
vk::Format depthStencilFormat; //!< The depth attachment format in the subpass of this pipeline, 'Undefined' if there is no depth attachment vk::Format depthStencilFormat; //!< The depth attachment format in the subpass of this pipeline, 'Undefined' if there is no depth attachment
vk::SampleCountFlagBits sampleCount; //!< The sample count of the subpass of this pipeline vk::SampleCountFlagBits sampleCount; //!< The sample count of the subpass of this pipeline
bool destroyShaderModules; //!< Whether the shader modules should be destroyed after the pipeline is compiled
constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const { constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const {
return vertexState.get<vk::PipelineVertexInputStateCreateInfo>(); return vertexState.get<vk::PipelineVertexInputStateCreateInfo>();
@ -56,8 +56,9 @@ namespace skyline::gpu::cache {
private: private:
GPU &gpu; GPU &gpu;
std::mutex mutex; //!< Synchronizes accesses to the pipeline cache
vk::raii::PipelineCache vkPipelineCache; //!< A Vulkan Pipeline Cache which stores all unique graphics pipelines vk::raii::PipelineCache vkPipelineCache; //!< A Vulkan Pipeline Cache which stores all unique graphics pipelines
BS::thread_pool pool;
std::string pipelineCacheDir;
/** /**
* @brief All unique metadata in a single attachment for a compatible render pass according to Render Pass Compatibility clause in the Vulkan specification * @brief All unique metadata in a single attachment for a compatible render pass according to Render Pass Compatibility clause in the Vulkan specification
@ -72,10 +73,7 @@ namespace skyline::gpu::cache {
bool operator==(const AttachmentMetadata &rhs) const = default; bool operator==(const AttachmentMetadata &rhs) const = default;
}; };
/** struct PipelineDescription {
* @brief All data in PipelineState in value form to allow cheap heterogenous lookups with reference types while still storing a value-based key in the map
*/
struct PipelineCacheKey {
std::vector<vk::PipelineShaderStageCreateInfo> shaderStages; std::vector<vk::PipelineShaderStageCreateInfo> shaderStages;
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState; vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState;
std::vector<vk::VertexInputBindingDescription> vertexBindings; std::vector<vk::VertexInputBindingDescription> vertexBindings;
@ -97,10 +95,9 @@ namespace skyline::gpu::cache {
std::vector<vk::Format> colorFormats; std::vector<vk::Format> colorFormats;
vk::Format depthStencilFormat; vk::Format depthStencilFormat;
vk::SampleCountFlagBits sampleCount; vk::SampleCountFlagBits sampleCount;
bool destroyShaderModules;
PipelineCacheKey(const PipelineState& state); PipelineDescription(const PipelineState& state);
bool operator==(const PipelineCacheKey& other) const = default;
constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const { constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const {
return vertexState.get<vk::PipelineVertexInputStateCreateInfo>(); return vertexState.get<vk::PipelineVertexInputStateCreateInfo>();
@ -119,44 +116,30 @@ namespace skyline::gpu::cache {
} }
}; };
struct PipelineStateHash { std::mutex mutex; //!< Protects access to `compilePendingDescs`
using is_transparent = std::true_type; std::list<PipelineDescription> compilePendingDescs; //!< List of pipeline descriptions that are pending compilation
size_t operator()(const PipelineState &key) const; /**
* @brief Synchronously compiles a pipeline with the state from the given description
size_t operator()(const PipelineCacheKey &key) const; */
}; vk::raii::Pipeline AssemblePipeline(std::list<PipelineDescription>::iterator pipelineDescIt, vk::PipelineLayout pipelineLayout);
struct PipelineCacheEqual {
using is_transparent = std::true_type;
bool operator()(const PipelineCacheKey &lhs, const PipelineState &rhs) const;
bool operator()(const PipelineCacheKey &lhs, const PipelineCacheKey &rhs) const;
};
struct PipelineCacheEntry {
vk::raii::DescriptorSetLayout descriptorSetLayout;
vk::raii::PipelineLayout pipelineLayout;
std::optional<std::shared_future<vk::raii::Pipeline>> pipeline;
PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout);
};
BS::thread_pool pool;
std::unordered_map<PipelineCacheKey, PipelineCacheEntry, PipelineStateHash, PipelineCacheEqual> pipelineCache;
vk::raii::Pipeline BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout);
public: public:
GraphicsPipelineCache(GPU &gpu); GraphicsPipelineAssembler(GPU &gpu, std::string_view pipelineCacheDir);
struct CompiledPipeline { struct CompiledPipeline {
vk::DescriptorSetLayout descriptorSetLayout; vk::raii::DescriptorSetLayout descriptorSetLayout;
vk::PipelineLayout pipelineLayout; vk::raii::PipelineLayout pipelineLayout;
std::shared_future<vk::raii::Pipeline> pipeline; std::shared_future<vk::raii::Pipeline> pipeline;
CompiledPipeline(const PipelineCacheEntry &entry); CompiledPipeline() : descriptorSetLayout{nullptr}, pipelineLayout{nullptr} {};
CompiledPipeline(vk::raii::DescriptorSetLayout descriptorSetLayout,
vk::raii::PipelineLayout pipelineLayout,
std::shared_future<vk::raii::Pipeline> pipeline)
: descriptorSetLayout{std::move(descriptorSetLayout)},
pipelineLayout{std::move(pipelineLayout)},
pipeline{std::move(pipeline)} {};
}; };
/** /**
@ -164,11 +147,16 @@ namespace skyline::gpu::cache {
* @note Shader specializiation constants are **not** supported and will result in UB * @note Shader specializiation constants are **not** supported and will result in UB
* @note Input/Resolve attachments are **not** supported and using them with the supplied pipeline will result in UB * @note Input/Resolve attachments are **not** supported and using them with the supplied pipeline will result in UB
*/ */
CompiledPipeline GetCompiledPipeline(const PipelineState& state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges = {}, bool noPushDescriptors = false); CompiledPipeline AssemblePipelineAsync(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges = {}, bool noPushDescriptors = false);
/** /**
* @brief Waits until the pipeline compilation thread pool is idle and all pipelines have been compiled * @brief Waits until the pipeline compilation thread pool is idle and all pipelines have been compiled
*/ */
void WaitIdle(); void WaitIdle();
/**
* @brief Saves the current Vulkan pipeline cache to the filesystem
*/
void SavePipelineCache();
}; };
} }

View File

@ -92,7 +92,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
if (ctx.gpu.traits.quirks.brokenMultithreadedPipelineCompilation) if (ctx.gpu.traits.quirks.brokenMultithreadedPipelineCompilation)
ctx.gpu.graphicsPipelineCache.WaitIdle(); ctx.gpu.graphicsPipelineAssembler->WaitIdle();
vk::raii::Pipeline pipeline{ctx.gpu.vkDevice, nullptr, pipelineInfo}; vk::raii::Pipeline pipeline{ctx.gpu.vkDevice, nullptr, pipelineInfo};

View File

@ -6,7 +6,7 @@
#include <gpu/interconnect/command_executor.h> #include <gpu/interconnect/command_executor.h>
#include <gpu/interconnect/common/pipeline.inc> #include <gpu/interconnect/common/pipeline.inc>
#include <gpu/interconnect/common/file_pipeline_state_accessor.h> #include <gpu/interconnect/common/file_pipeline_state_accessor.h>
#include <gpu/cache/graphics_pipeline_cache.h> #include <gpu/graphics_pipeline_assembler.h>
#include <gpu/shader_manager.h> #include <gpu/shader_manager.h>
#include <gpu.h> #include <gpu.h>
#include "graphics_pipeline_state_accessor.h" #include "graphics_pipeline_state_accessor.h"
@ -445,10 +445,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
} }
static cache::GraphicsPipelineCache::CompiledPipeline MakeCompiledPipeline(GPU &gpu, static GraphicsPipelineAssembler::CompiledPipeline MakeCompiledPipeline(GPU &gpu,
const PackedPipelineState &packedState, const PackedPipelineState &packedState,
const std::array<Pipeline::ShaderStage, engine::ShaderStageCount> &shaderStages, const std::array<ShaderStage, engine::ShaderStageCount> &shaderStages,
span<vk::DescriptorSetLayoutBinding> layoutBindings) { span<vk::DescriptorSetLayoutBinding> layoutBindings) {
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, engine::ShaderStageCount> shaderStageInfos; boost::container::static_vector<vk::PipelineShaderStageCreateInfo, engine::ShaderStageCount> shaderStageInfos;
for (const auto &stage : shaderStages) for (const auto &stage : shaderStages)
if (stage.module) if (stage.module)
@ -604,7 +604,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
texture::Format depthStencilFormat{packedState.GetDepthRenderTargetFormat()}; texture::Format depthStencilFormat{packedState.GetDepthRenderTargetFormat()};
return gpu.graphicsPipelineCache.GetCompiledPipeline(cache::GraphicsPipelineCache::PipelineState{ return gpu.graphicsPipelineAssembler->AssemblePipelineAsync(GraphicsPipelineAssembler::PipelineState{
.shaderStages = shaderStageInfos, .shaderStages = shaderStageInfos,
.vertexState = vertexInputState, .vertexState = vertexInputState,
.inputAssemblyState = inputAssemblyState, .inputAssemblyState = inputAssemblyState,
@ -618,6 +618,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
.colorFormats = colorAttachmentFormats, .colorFormats = colorAttachmentFormats,
.depthStencilFormat = depthStencilFormat ? depthStencilFormat->vkFormat : vk::Format::eUndefined, .depthStencilFormat = depthStencilFormat ? depthStencilFormat->vkFormat : vk::Format::eUndefined,
.sampleCount = vk::SampleCountFlagBits::e1, //TODO: fix after MSAA support .sampleCount = vk::SampleCountFlagBits::e1, //TODO: fix after MSAA support
.destroyShaderModules = true
}, layoutBindings); }, layoutBindings);
} }

View File

@ -5,7 +5,7 @@
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include <shader_compiler/frontend/ir/program.h> #include <shader_compiler/frontend/ir/program.h>
#include <gpu/cache/graphics_pipeline_cache.h> #include <gpu/graphics_pipeline_assembler.h>
#include <gpu/interconnect/common/samplers.h> #include <gpu/interconnect/common/samplers.h>
#include <gpu/interconnect/common/textures.h> #include <gpu/interconnect/common/textures.h>
#include <gpu/interconnect/common/pipeline_state_accessor.h> #include <gpu/interconnect/common/pipeline_state_accessor.h>

View File

@ -4,7 +4,7 @@
#include <gpu.h> #include <gpu.h>
#include <gpu/descriptor_allocator.h> #include <gpu/descriptor_allocator.h>
#include <gpu/texture/texture.h> #include <gpu/texture/texture.h>
#include <gpu/cache/graphics_pipeline_cache.h> #include <gpu/graphics_pipeline_assembler.h>
#include <vfs/filesystem.h> #include <vfs/filesystem.h>
#include "helper_shaders.h" #include "helper_shaders.h"
@ -39,9 +39,9 @@ namespace skyline::gpu {
}} }}
} {} } {}
cache::GraphicsPipelineCache::CompiledPipeline SimpleSingleRtShader::GetPipeline(GPU &gpu, const GraphicsPipelineAssembler::CompiledPipeline &SimpleSingleRtShader::GetPipeline(GPU &gpu,
const PipelineState &state, const PipelineState &state,
span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges) { span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges) {
if (auto it{pipelineCache.find(state)}; it != pipelineCache.end()) if (auto it{pipelineCache.find(state)}; it != pipelineCache.end())
return it->second; return it->second;
@ -137,7 +137,7 @@ namespace skyline::gpu {
vertexState.unlink<vk::PipelineVertexInputDivisorStateCreateInfoEXT>(); vertexState.unlink<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
std::array<vk::Format, 1> colorFormats{state.colorFormat}; std::array<vk::Format, 1> colorFormats{state.colorFormat};
return pipelineCache.emplace(state, gpu.graphicsPipelineCache.GetCompiledPipeline(cache::GraphicsPipelineCache::PipelineState{ return pipelineCache.emplace(state, gpu.graphicsPipelineAssembler->AssemblePipelineAsync(GraphicsPipelineAssembler::PipelineState{
.shaderStages = shaderStages, .shaderStages = shaderStages,
.vertexState = vertexState, .vertexState = vertexState,
.inputAssemblyState = inputAssemblyState, .inputAssemblyState = inputAssemblyState,
@ -233,15 +233,15 @@ namespace skyline::gpu {
blit::VertexPushConstantLayout vertexPushConstants; blit::VertexPushConstantLayout vertexPushConstants;
blit::FragmentPushConstantLayout fragmentPushConstants; blit::FragmentPushConstantLayout fragmentPushConstants;
DescriptorAllocator::ActiveDescriptorSet descriptorSet; DescriptorAllocator::ActiveDescriptorSet descriptorSet;
cache::GraphicsPipelineCache::CompiledPipeline pipeline; const GraphicsPipelineAssembler::CompiledPipeline &pipeline;
vk::Extent2D imageDimensions; vk::Extent2D imageDimensions;
DrawState(GPU &gpu, DrawState(GPU &gpu,
blit::VertexPushConstantLayout vertexPushConstants, blit::FragmentPushConstantLayout fragmentPushConstants, blit::VertexPushConstantLayout vertexPushConstants, blit::FragmentPushConstantLayout fragmentPushConstants,
cache::GraphicsPipelineCache::CompiledPipeline pipeline, const GraphicsPipelineAssembler::CompiledPipeline &pipeline,
vk::Extent2D imageDimensions) vk::Extent2D imageDimensions)
: vertexPushConstants{vertexPushConstants}, fragmentPushConstants{fragmentPushConstants}, : vertexPushConstants{vertexPushConstants}, fragmentPushConstants{fragmentPushConstants},
descriptorSet{gpu.descriptor.AllocateSet(pipeline.descriptorSetLayout)}, descriptorSet{gpu.descriptor.AllocateSet(*pipeline.descriptorSetLayout)},
pipeline{pipeline}, pipeline{pipeline},
imageDimensions{imageDimensions} {} imageDimensions{imageDimensions} {}
}; };
@ -300,10 +300,10 @@ namespace skyline::gpu {
commandBuffer.setScissor(0, {scissor}); commandBuffer.setScissor(0, {scissor});
commandBuffer.setViewport(0, {viewport}); commandBuffer.setViewport(0, {viewport});
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, *drawState->pipeline.pipeline.get()); commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, *drawState->pipeline.pipeline.get());
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipelineLayout, 0, *drawState->descriptorSet, nullptr); commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *drawState->pipeline.pipelineLayout, 0, *drawState->descriptorSet, nullptr);
commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eVertex, 0, commandBuffer.pushConstants(*drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eVertex, 0,
vk::ArrayProxy<const blit::VertexPushConstantLayout>{drawState->vertexPushConstants}); vk::ArrayProxy<const blit::VertexPushConstantLayout>{drawState->vertexPushConstants});
commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, sizeof(blit::VertexPushConstantLayout), commandBuffer.pushConstants(*drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, sizeof(blit::VertexPushConstantLayout),
vk::ArrayProxy<const blit::FragmentPushConstantLayout>{drawState->fragmentPushConstants}); vk::ArrayProxy<const blit::FragmentPushConstantLayout>{drawState->fragmentPushConstants});
commandBuffer.draw(6, 1, 0, 0); commandBuffer.draw(6, 1, 0, 0);
}); });
@ -333,12 +333,12 @@ namespace skyline::gpu {
std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb) { std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb) {
struct DrawState { struct DrawState {
clear::FragmentPushConstantLayout fragmentPushConstants; clear::FragmentPushConstantLayout fragmentPushConstants;
cache::GraphicsPipelineCache::CompiledPipeline pipeline; const GraphicsPipelineAssembler::CompiledPipeline &pipeline;
vk::Extent2D imageDimensions; vk::Extent2D imageDimensions;
DrawState(GPU &gpu, DrawState(GPU &gpu,
clear::FragmentPushConstantLayout fragmentPushConstants, clear::FragmentPushConstantLayout fragmentPushConstants,
cache::GraphicsPipelineCache::CompiledPipeline pipeline, const GraphicsPipelineAssembler::CompiledPipeline &pipeline,
vk::Extent2D imageDimensions) vk::Extent2D imageDimensions)
: fragmentPushConstants{fragmentPushConstants}, : fragmentPushConstants{fragmentPushConstants},
pipeline{pipeline}, pipeline{pipeline},
@ -385,7 +385,7 @@ namespace skyline::gpu {
commandBuffer.setScissor(0, {scissor}); commandBuffer.setScissor(0, {scissor});
commandBuffer.setViewport(0, {viewport}); commandBuffer.setViewport(0, {viewport});
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, *drawState->pipeline.pipeline.get()); commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, *drawState->pipeline.pipeline.get());
commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, 0, commandBuffer.pushConstants(*drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, 0,
vk::ArrayProxy<const clear::FragmentPushConstantLayout>{drawState->fragmentPushConstants}); vk::ArrayProxy<const clear::FragmentPushConstantLayout>{drawState->fragmentPushConstants});
commandBuffer.draw(6, 1, 0, 0); commandBuffer.draw(6, 1, 0, 0);
}); });

View File

@ -5,7 +5,7 @@
#include <vulkan/vulkan_raii.hpp> #include <vulkan/vulkan_raii.hpp>
#include <gpu/descriptor_allocator.h> #include <gpu/descriptor_allocator.h>
#include <gpu/cache/graphics_pipeline_cache.h> #include <gpu/graphics_pipeline_assembler.h>
namespace skyline::vfs { namespace skyline::vfs {
class FileSystem; class FileSystem;
@ -36,7 +36,7 @@ namespace skyline::gpu {
} }
}; };
std::unordered_map<PipelineState, cache::GraphicsPipelineCache::CompiledPipeline, util::ObjectHash<PipelineState>> pipelineCache; std::unordered_map<PipelineState, GraphicsPipelineAssembler::CompiledPipeline, util::ObjectHash<PipelineState>> pipelineCache;
vk::raii::ShaderModule vertexShaderModule; vk::raii::ShaderModule vertexShaderModule;
vk::raii::ShaderModule fragmentShaderModule; vk::raii::ShaderModule fragmentShaderModule;
@ -47,9 +47,9 @@ namespace skyline::gpu {
/** /**
* @brief Returns a potentially cached pipeline built according to the supplied input state * @brief Returns a potentially cached pipeline built according to the supplied input state
*/ */
cache::GraphicsPipelineCache::CompiledPipeline GetPipeline(GPU &gpu, const GraphicsPipelineAssembler::CompiledPipeline &GetPipeline(GPU &gpu,
const PipelineState &state, const PipelineState &state,
span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges); span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges);
}; };
/** /**

View File

@ -214,6 +214,11 @@ namespace skyline::gpu {
minimumStorageBufferAlignment = static_cast<u32>(deviceProperties2.get().properties.limits.minStorageBufferOffsetAlignment); minimumStorageBufferAlignment = static_cast<u32>(deviceProperties2.get().properties.limits.minStorageBufferOffsetAlignment);
vendorId = deviceProperties2.get().properties.vendorID;
deviceId = deviceProperties2.get().properties.deviceID;
driverVersion = deviceProperties2.get().properties.driverVersion;
pipelineCacheUuid = deviceProperties2.get().properties.pipelineCacheUUID;
} }
std::string TraitManager::Summary() { std::string TraitManager::Summary() {

View File

@ -54,6 +54,11 @@ namespace skyline::gpu {
u32 hostVisibleCoherentCachedMemoryType{std::numeric_limits<u32>::max()}; u32 hostVisibleCoherentCachedMemoryType{std::numeric_limits<u32>::max()};
u32 minimumStorageBufferAlignment{}; //!< Minimum alignment for storage buffers passed to shaders u32 minimumStorageBufferAlignment{}; //!< Minimum alignment for storage buffers passed to shaders
u32 vendorId{}; //!< The `vendorID` Vulkan property
u32 deviceId{}; //!< The `deviceID` Vulkan property
u32 driverVersion{}; //!< The `driverVersion` Vulkan property
std::array<u8, VK_UUID_SIZE> pipelineCacheUuid{}; //!< The `pipelineCacheUUID` Vulkan property
std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7 std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7
bool supportsAdrenoDirectMemoryImport{}; bool supportsAdrenoDirectMemoryImport{};