From 02f99273ac6e23d611a13f1eb514c971de62b9f5 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sat, 16 Apr 2022 15:40:35 +0530 Subject: [PATCH] Implement Shader Module Cache Implements caching of the compiled shader module (`VkShaderModule`) in an associative map based on the supplied IR, bindings and runtime state to avoid constant recompilation of shaders. This doesn't entirely address shader compilation as an issue since host shader compilation is tied to Vulkan pipeline objects rather than Vulkan shader modules, they need to be cached to prevent costly host shader recompilation. --- .../gpu/interconnect/graphics_context.h | 12 ++-- .../main/cpp/skyline/gpu/shader_manager.cpp | 69 ++++++++++++++++++- app/src/main/cpp/skyline/gpu/shader_manager.h | 19 ++++- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index ac20e34b..483f24e7 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -657,7 +657,7 @@ namespace skyline::gpu::interconnect { struct KeyHash { size_t operator()(const Key &entry) const noexcept { - size_t seed = 0; + size_t seed{}; boost::hash_combine(seed, entry.size); boost::hash_combine(seed, entry.iova); @@ -784,7 +784,7 @@ namespace skyline::gpu::interconnect { bool needsRecompile{}; //!< If the shader needs to be recompiled as runtime information has changed ShaderCompiler::VaryingState previousStageStores{}; u32 bindingBase{}, bindingLast{}; //!< The base and last binding for descriptors bound to this stage - std::shared_ptr vkModule; + vk::ShaderModule vkModule; std::array constantBuffers{}; @@ -828,7 +828,7 @@ namespace skyline::gpu::interconnect { * @note The `descriptorSetWrite` will have a null `dstSet` which needs to be assigned prior to usage */ struct ShaderProgramState { - boost::container::static_vector, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage + boost::container::static_vector shaderModules; //!< Shader modules for every pipeline stage boost::container::static_vector shaderStages; //!< Shader modules for every pipeline stage vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero) @@ -986,7 +986,7 @@ namespace skyline::gpu::interconnect { ShaderCompiler::Backend::Bindings bindings{}; size_t bufferIndex{}, imageIndex{}; - boost::container::static_vector, maxwell3d::PipelineStageCount> shaderModules; + boost::container::static_vector shaderModules; boost::container::static_vector shaderStages; for (auto &pipelineStage : pipelineStages) { if (!pipelineStage.enabled) @@ -995,7 +995,7 @@ namespace skyline::gpu::interconnect { if (pipelineStage.needsRecompile || bindings.unified != pipelineStage.bindingBase || pipelineStage.previousStageStores.mask != runtimeInfo.previous_stage_stores.mask) { pipelineStage.previousStageStores = runtimeInfo.previous_stage_stores; pipelineStage.bindingBase = bindings.unified; - pipelineStage.vkModule = std::make_shared(gpu.shader.CompileShader(runtimeInfo, pipelineStage.program, bindings)); + pipelineStage.vkModule = gpu.shader.CompileShader(runtimeInfo, pipelineStage.program, bindings); pipelineStage.bindingLast = bindings.unified; } @@ -1128,7 +1128,7 @@ namespace skyline::gpu::interconnect { shaderModules.emplace_back(pipelineStage.vkModule); shaderStages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = pipelineStage.vkStage, - .module = **pipelineStage.vkModule, + .module = pipelineStage.vkModule, .pName = "main", }); } diff --git a/app/src/main/cpp/skyline/gpu/shader_manager.cpp b/app/src/main/cpp/skyline/gpu/shader_manager.cpp index 9762b85d..6967099e 100644 --- a/app/src/main/cpp/skyline/gpu/shader_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/shader_manager.cpp @@ -197,14 +197,77 @@ namespace skyline::gpu { return program; } - vk::raii::ShaderModule ShaderManager::CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr &program, Shader::Backend::Bindings &bindings) { + bool ShaderManager::ShaderModuleState::operator==(const ShaderModuleState &other) const { + if (program != other.program) + return false; + + if (bindings.unified != other.bindings.unified || bindings.uniform_buffer != other.bindings.uniform_buffer || bindings.storage_buffer != other.bindings.storage_buffer || bindings.texture != other.bindings.texture || bindings.image != other.bindings.image || bindings.texture_scaling_index != other.bindings.texture_scaling_index || bindings.image_scaling_index != other.bindings.image_scaling_index) + return false; + + static_assert(sizeof(Shader::Backend::Bindings) == 0x1C); + + if (!std::equal(runtimeInfo.generic_input_types.begin(), runtimeInfo.generic_input_types.end(), other.runtimeInfo.generic_input_types.begin())) + return false; + + #define NEQ(member) runtimeInfo.member != other.runtimeInfo.member + + if (NEQ(previous_stage_stores.mask) || NEQ(convert_depth_mode) || NEQ(force_early_z) || NEQ(tess_primitive) || NEQ(tess_spacing) || NEQ(tess_clockwise) || NEQ(input_topology) || NEQ(fixed_state_point_size) || NEQ(alpha_test_func) || NEQ(alpha_test_reference) || NEQ(y_negate) || NEQ(glasm_use_storage_buffers)) + return false; + + #undef NEQ + + if (!std::equal(runtimeInfo.xfb_varyings.begin(), runtimeInfo.xfb_varyings.end(), other.runtimeInfo.xfb_varyings.begin(), [](const Shader::TransformFeedbackVarying &a, const Shader::TransformFeedbackVarying &b) { + return a.buffer == b.buffer && a.stride == b.stride && a.offset == b.offset && a.components == b.components; + })) + return false; + + static_assert(sizeof(Shader::RuntimeInfo) == 0x88); + + return true; + } + + constexpr size_t ShaderManager::ShaderModuleStateHash::operator()(const ShaderManager::ShaderModuleState &state) const { + size_t hash{}; + + boost::hash_combine(hash, state.program); + + hash = XXH64(&state.bindings, sizeof(Shader::Backend::Bindings), hash); + + #define RIH(member) boost::hash_combine(hash, state.runtimeInfo.member) + + hash = XXH64(state.runtimeInfo.generic_input_types.data(), state.runtimeInfo.generic_input_types.size() * sizeof(u32), hash); + hash = XXH64(&state.runtimeInfo.previous_stage_stores.mask, sizeof(state.runtimeInfo.previous_stage_stores.mask), hash); + RIH(convert_depth_mode); + RIH(force_early_z); + RIH(tess_primitive); + RIH(tess_spacing); + RIH(tess_clockwise); + RIH(input_topology); + RIH(fixed_state_point_size.value_or(NAN)); + RIH(alpha_test_func.value_or(Shader::CompareFunction::Never)); + RIH(alpha_test_reference); + RIH(glasm_use_storage_buffers); + hash = XXH64(state.runtimeInfo.xfb_varyings.data(), state.runtimeInfo.xfb_varyings.size() * sizeof(Shader::TransformFeedbackVarying), hash); + + static_assert(sizeof(Shader::RuntimeInfo) == 0x88); + #undef RIH + + return hash; + } + + vk::ShaderModule ShaderManager::CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr &program, Shader::Backend::Bindings &bindings) { + auto it{shaderModuleCache.find(ShaderModuleState{program, bindings, runtimeInfo})}; + if (it != shaderModuleCache.end()) + return *it->second; + auto spirv{Shader::Backend::SPIRV::EmitSPIRV(profile, runtimeInfo, program->program, bindings)}; vk::ShaderModuleCreateInfo createInfo{ .pCode = spirv.data(), .codeSize = spirv.size() * sizeof(u32), }; - vk::raii::ShaderModule shaderModule(gpu.vkDevice, createInfo); - return shaderModule; + + auto shaderModule{shaderModuleCache.try_emplace(ShaderModuleState{program, bindings, runtimeInfo}, gpu.vkDevice, createInfo)}; + return *shaderModule.first->second; } } diff --git a/app/src/main/cpp/skyline/gpu/shader_manager.h b/app/src/main/cpp/skyline/gpu/shader_manager.h index abf3de5a..49e90492 100644 --- a/app/src/main/cpp/skyline/gpu/shader_manager.h +++ b/app/src/main/cpp/skyline/gpu/shader_manager.h @@ -64,6 +64,23 @@ namespace skyline::gpu { std::unordered_map, DualVertexProgramsHash> dualProgramCache; //!< A map from Vertex A and Vertex B shader programs to the corresponding dual vertex shader program + /** + * @brief All unique state that is required to compile a shader program, this is used as the key for the associative compiled shader program cache + */ + struct ShaderModuleState { + std::shared_ptr program; + Shader::Backend::Bindings bindings; + Shader::RuntimeInfo runtimeInfo; + + bool operator==(const ShaderModuleState &) const; + }; + + struct ShaderModuleStateHash { + constexpr size_t operator()(const ShaderModuleState &state) const; + }; + + std::unordered_map shaderModuleCache; //!< A map from shader module state to the corresponding Vulkan shader module + public: ShaderManager(const DeviceState &state, GPU &gpu); @@ -75,6 +92,6 @@ namespace skyline::gpu { */ std::shared_ptr CombineVertexShaders(const std::shared_ptr &vertexA, const std::shared_ptr &vertexB, span vertexBBinary); - vk::raii::ShaderModule CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr &program, Shader::Backend::Bindings &bindings); + vk::ShaderModule CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr &program, Shader::Backend::Bindings &bindings); }; }