From 072b8193a1e856e97c01e11435608078407b83a2 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 3 Dec 2022 19:57:00 +0000 Subject: [PATCH] Implement thread pool based async pipeline compilation with futures By distributing the load of shader compiling onto multiple threads and then only waiting for completion until absolutely neccessary we can reduce compilation stutters significantly. --- .../gpu/cache/graphics_pipeline_cache.cpp | 88 +++++++++---------- .../gpu/cache/graphics_pipeline_cache.h | 11 ++- .../gpu/interconnect/common/state_updater.h | 19 ++++ .../maxwell_3d/packed_pipeline_state.cpp | 2 +- .../maxwell_3d/pipeline_manager.cpp | 8 +- .../maxwell_3d/pipeline_manager.h | 4 +- 6 files changed, 78 insertions(+), 54 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp index 8eed801c..e55fa68b 100644 --- a/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp +++ b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp @@ -296,32 +296,9 @@ namespace skyline::gpu::cache { return lhs == rhs; } - GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout, vk::raii::Pipeline &&pipeline) : descriptorSetLayout(std::move(descriptorSetLayout)), pipelineLayout(std::move(pipelineLayout)), pipeline(std::move(pipeline)) {} - - GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout(*entry.descriptorSetLayout), pipelineLayout(*entry.pipelineLayout), pipeline(*entry.pipeline) {} - - GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span layoutBindings, span pushConstantRanges, bool noPushDescriptors) { - std::unique_lock lock(mutex); - - auto it{pipelineCache.find(state)}; - if (it != pipelineCache.end()) - return CompiledPipeline{it->second}; - - lock.unlock(); - - vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{ - .flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}}, - .pBindings = layoutBindings.data(), - .bindingCount = static_cast(layoutBindings.size()), - }}; - - vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{ - .pSetLayouts = &*descriptorSetLayout, - .setLayoutCount = 1, - .pPushConstantRanges = pushConstantRanges.data(), - .pushConstantRangeCount = static_cast(pushConstantRanges.size()), - }}; + GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout{std::move(descriptorSetLayout)}, pipelineLayout{std::move(pipelineLayout)} {} + vk::raii::Pipeline GraphicsPipelineCache::BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout) { boost::container::small_vector attachmentDescriptions; boost::container::small_vector attachmentReferences; @@ -329,7 +306,7 @@ namespace skyline::gpu::cache { if (format != vk::Format::eUndefined) { attachmentDescriptions.push_back(vk::AttachmentDescription{ .format = format, - .samples = state.sampleCount, + .samples = key.sampleCount, .loadOp = vk::AttachmentLoadOp::eLoad, .storeOp = vk::AttachmentStoreOp::eStore, .stencilLoadOp = vk::AttachmentLoadOp::eLoad, @@ -354,11 +331,11 @@ namespace skyline::gpu::cache { .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, }; - for (auto &colorAttachment : state.colorFormats) + for (auto &colorAttachment : key.colorFormats) pushAttachment(colorAttachment); - if (state.depthStencilFormat != vk::Format::eUndefined) { - pushAttachment(state.depthStencilFormat); + if (key.depthStencilFormat != vk::Format::eUndefined) { + pushAttachment(key.depthStencilFormat); subpassDescription.pColorAttachments = attachmentReferences.data(); subpassDescription.colorAttachmentCount = static_cast(attachmentReferences.size() - 1); @@ -375,25 +352,48 @@ namespace skyline::gpu::cache { .pSubpasses = &subpassDescription, }}; - auto pipeline{gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{ - .pStages = state.shaderStages.data(), - .stageCount = static_cast(state.shaderStages.size()), - .pVertexInputState = &state.vertexState.get(), - .pInputAssemblyState = &state.inputAssemblyState, - .pViewportState = &state.viewportState, - .pRasterizationState = &state.rasterizationState.get(), - .pMultisampleState = &state.multisampleState, - .pDepthStencilState = &state.depthStencilState, - .pColorBlendState = &state.colorBlendState, - .pDynamicState = &state.dynamicState, - .layout = *pipelineLayout, + return gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{ + .pStages = key.shaderStages.data(), + .stageCount = static_cast(key.shaderStages.size()), + .pVertexInputState = &key.vertexState.get(), + .pInputAssemblyState = &key.inputAssemblyState, + .pViewportState = &key.viewportState, + .pRasterizationState = &key.rasterizationState.get(), + .pMultisampleState = &key.multisampleState, + .pDepthStencilState = &key.depthStencilState, + .pColorBlendState = &key.colorBlendState, + .pDynamicState = &key.dynamicState, + .layout = pipelineLayout, .renderPass = *renderPass, .subpass = 0, - })}; + }); + } - lock.lock(); + GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout{*entry.descriptorSetLayout}, pipelineLayout{*entry.pipelineLayout}, pipeline{*entry.pipeline} {} - auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout), std::move(pipeline))}; + GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span layoutBindings, span pushConstantRanges, bool noPushDescriptors) { + std::unique_lock lock(mutex); + + auto it{pipelineCache.find(state)}; + if (it != pipelineCache.end()) + return CompiledPipeline{it->second}; + + vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{ + .flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}}, + .pBindings = layoutBindings.data(), + .bindingCount = static_cast(layoutBindings.size()), + }}; + + vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{ + .pSetLayouts = &*descriptorSetLayout, + .setLayoutCount = 1, + .pPushConstantRanges = pushConstantRanges.data(), + .pushConstantRangeCount = static_cast(pushConstantRanges.size()), + }}; + + auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout))}; + auto pipelineFuture{pool.submit(&GraphicsPipelineCache::BuildPipeline, this, std::ref(pipelineEntryIt.first->first), std::ref(*pipelineEntryIt.first->second.pipelineLayout))}; + pipelineEntryIt.first->second.pipeline = pipelineFuture.share(); return CompiledPipeline{pipelineEntryIt.first->second}; } } diff --git a/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h index 9f5590a7..828fecbb 100644 --- a/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h +++ b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include namespace skyline::gpu { @@ -136,20 +138,23 @@ namespace skyline::gpu::cache { struct PipelineCacheEntry { vk::raii::DescriptorSetLayout descriptorSetLayout; vk::raii::PipelineLayout pipelineLayout; - vk::raii::Pipeline pipeline; + std::optional> pipeline; - PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout, vk::raii::Pipeline &&pipeline); + PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout); }; + BS::thread_pool pool; std::unordered_map pipelineCache; + vk::raii::Pipeline BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout); + public: GraphicsPipelineCache(GPU &gpu); struct CompiledPipeline { vk::DescriptorSetLayout descriptorSetLayout; vk::PipelineLayout pipelineLayout; - vk::Pipeline pipeline; + std::shared_future pipeline; CompiledPipeline(const PipelineCacheEntry &entry); }; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h b/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h index 2ceda797..3e17ef11 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include "common.h" @@ -261,6 +262,16 @@ namespace skyline::gpu::interconnect { }; using SetPipelineCmd = CmdHolder; + struct SetPipelineFutureCmdImpl { + void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { + commandBuffer.bindPipeline(bindPoint, *pipeline.get()); + } + + std::shared_future pipeline; + vk::PipelineBindPoint bindPoint; + }; + using SetPipelineFutureCmd = CmdHolder; + /** * @brief Single-use helper for recording a batch of state updates into a command buffer */ @@ -471,6 +482,14 @@ namespace skyline::gpu::interconnect { }); } + void SetPipeline(const std::shared_future &pipeline, vk::PipelineBindPoint bindPoint) { + AppendCmd( + { + .pipeline = pipeline, + .bindPoint = bindPoint, + }); + } + void SetDescriptorSetWithPush(DescriptorUpdateInfo *updateInfo) { AppendCmd( { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp index 5df81930..7f1e89bb 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp @@ -205,7 +205,7 @@ namespace skyline::gpu::interconnect::maxwell3d { size_t PackedPipelineState::GetColorRenderTargetCount() const { for (size_t i{engine::ColorTargetCount}; i > 0 ; i--) - if (IsColorRenderTargetEnabled(i - 1)) + if (IsColorRenderTargetEnabled(ctSelect[i - 1])) return i; return 0; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp index d5e234cb..1b193555 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp @@ -530,7 +530,7 @@ namespace skyline::gpu::interconnect::maxwell3d { for (u32 i{}; i < packedState.GetColorRenderTargetCount(); i++) { attachmentBlendStates.push_back(packedState.GetAttachmentBlendState(i)); - texture::Format format{packedState.GetColorRenderTargetFormat(i)}; + texture::Format format{packedState.GetColorRenderTargetFormat(packedState.ctSelect[i])}; colorAttachmentFormats.push_back(format ? format->vkFormat : vk::Format::eUndefined); } @@ -595,10 +595,10 @@ namespace skyline::gpu::interconnect::maxwell3d { } Pipeline::Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState) - : shaderStages{MakePipelineShaders(ctx, accessor, packedState)}, + : sourcePackedState{packedState}, + shaderStages{MakePipelineShaders(ctx, accessor, sourcePackedState)}, descriptorInfo{MakePipelineDescriptorInfo(shaderStages, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites)}, - compiledPipeline{MakeCompiledPipeline(ctx, packedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)}, - sourcePackedState{packedState} { + compiledPipeline{MakeCompiledPipeline(ctx, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)} { storageBufferViews.resize(descriptorInfo.totalStorageBufferCount); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h index eadfea2b..b6d9dab9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h @@ -82,6 +82,8 @@ namespace skyline::gpu::interconnect::maxwell3d { u32 totalImageDescCount; }; + PackedPipelineState sourcePackedState; + private: std::vector storageBufferViews; u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at @@ -99,8 +101,6 @@ namespace skyline::gpu::interconnect::maxwell3d { cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline; size_t sampledImageCount{}; - PackedPipelineState sourcePackedState; - Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState); Pipeline *LookupNext(const PackedPipelineState &packedState);