Implement Shader Module Cache

Implements caching of the compiled shader module (`VkShaderModule`) in an associative map based on the supplied IR, bindings and runtime state to avoid constant recompilation of shaders. This doesn't entirely address shader compilation as an issue since host shader compilation is tied to Vulkan pipeline objects rather than Vulkan shader modules, they need to be cached to prevent costly host shader recompilation.
This commit is contained in:
PixelyIon 2022-04-16 15:40:35 +05:30
parent 76d8172a35
commit 02f99273ac
3 changed files with 90 additions and 10 deletions

View File

@ -657,7 +657,7 @@ namespace skyline::gpu::interconnect {
struct KeyHash { struct KeyHash {
size_t operator()(const Key &entry) const noexcept { size_t operator()(const Key &entry) const noexcept {
size_t seed = 0; size_t seed{};
boost::hash_combine(seed, entry.size); boost::hash_combine(seed, entry.size);
boost::hash_combine(seed, entry.iova); boost::hash_combine(seed, entry.iova);
@ -784,7 +784,7 @@ namespace skyline::gpu::interconnect {
bool needsRecompile{}; //!< If the shader needs to be recompiled as runtime information has changed bool needsRecompile{}; //!< If the shader needs to be recompiled as runtime information has changed
ShaderCompiler::VaryingState previousStageStores{}; ShaderCompiler::VaryingState previousStageStores{};
u32 bindingBase{}, bindingLast{}; //!< The base and last binding for descriptors bound to this stage u32 bindingBase{}, bindingLast{}; //!< The base and last binding for descriptors bound to this stage
std::shared_ptr<vk::raii::ShaderModule> vkModule; vk::ShaderModule vkModule;
std::array<ConstantBuffer, maxwell3d::PipelineStageConstantBufferCount> constantBuffers{}; std::array<ConstantBuffer, maxwell3d::PipelineStageConstantBufferCount> constantBuffers{};
@ -828,7 +828,7 @@ namespace skyline::gpu::interconnect {
* @note The `descriptorSetWrite` will have a null `dstSet` which needs to be assigned prior to usage * @note The `descriptorSetWrite` will have a null `dstSet` which needs to be assigned prior to usage
*/ */
struct ShaderProgramState { struct ShaderProgramState {
boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage boost::container::static_vector<vk::ShaderModule, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; //!< Shader modules for every pipeline stage boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; //!< Shader modules for every pipeline stage
vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero) vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero)
@ -986,7 +986,7 @@ namespace skyline::gpu::interconnect {
ShaderCompiler::Backend::Bindings bindings{}; ShaderCompiler::Backend::Bindings bindings{};
size_t bufferIndex{}, imageIndex{}; size_t bufferIndex{}, imageIndex{};
boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules; boost::container::static_vector<vk::ShaderModule, maxwell3d::PipelineStageCount> shaderModules;
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages;
for (auto &pipelineStage : pipelineStages) { for (auto &pipelineStage : pipelineStages) {
if (!pipelineStage.enabled) if (!pipelineStage.enabled)
@ -995,7 +995,7 @@ namespace skyline::gpu::interconnect {
if (pipelineStage.needsRecompile || bindings.unified != pipelineStage.bindingBase || pipelineStage.previousStageStores.mask != runtimeInfo.previous_stage_stores.mask) { if (pipelineStage.needsRecompile || bindings.unified != pipelineStage.bindingBase || pipelineStage.previousStageStores.mask != runtimeInfo.previous_stage_stores.mask) {
pipelineStage.previousStageStores = runtimeInfo.previous_stage_stores; pipelineStage.previousStageStores = runtimeInfo.previous_stage_stores;
pipelineStage.bindingBase = bindings.unified; pipelineStage.bindingBase = bindings.unified;
pipelineStage.vkModule = std::make_shared<vk::raii::ShaderModule>(gpu.shader.CompileShader(runtimeInfo, pipelineStage.program, bindings)); pipelineStage.vkModule = gpu.shader.CompileShader(runtimeInfo, pipelineStage.program, bindings);
pipelineStage.bindingLast = bindings.unified; pipelineStage.bindingLast = bindings.unified;
} }
@ -1128,7 +1128,7 @@ namespace skyline::gpu::interconnect {
shaderModules.emplace_back(pipelineStage.vkModule); shaderModules.emplace_back(pipelineStage.vkModule);
shaderStages.emplace_back(vk::PipelineShaderStageCreateInfo{ shaderStages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = pipelineStage.vkStage, .stage = pipelineStage.vkStage,
.module = **pipelineStage.vkModule, .module = pipelineStage.vkModule,
.pName = "main", .pName = "main",
}); });
} }

View File

@ -197,14 +197,77 @@ namespace skyline::gpu {
return program; return program;
} }
vk::raii::ShaderModule ShaderManager::CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr<ShaderProgram> &program, Shader::Backend::Bindings &bindings) { bool ShaderManager::ShaderModuleState::operator==(const ShaderModuleState &other) const {
if (program != other.program)
return false;
if (bindings.unified != other.bindings.unified || bindings.uniform_buffer != other.bindings.uniform_buffer || bindings.storage_buffer != other.bindings.storage_buffer || bindings.texture != other.bindings.texture || bindings.image != other.bindings.image || bindings.texture_scaling_index != other.bindings.texture_scaling_index || bindings.image_scaling_index != other.bindings.image_scaling_index)
return false;
static_assert(sizeof(Shader::Backend::Bindings) == 0x1C);
if (!std::equal(runtimeInfo.generic_input_types.begin(), runtimeInfo.generic_input_types.end(), other.runtimeInfo.generic_input_types.begin()))
return false;
#define NEQ(member) runtimeInfo.member != other.runtimeInfo.member
if (NEQ(previous_stage_stores.mask) || NEQ(convert_depth_mode) || NEQ(force_early_z) || NEQ(tess_primitive) || NEQ(tess_spacing) || NEQ(tess_clockwise) || NEQ(input_topology) || NEQ(fixed_state_point_size) || NEQ(alpha_test_func) || NEQ(alpha_test_reference) || NEQ(y_negate) || NEQ(glasm_use_storage_buffers))
return false;
#undef NEQ
if (!std::equal(runtimeInfo.xfb_varyings.begin(), runtimeInfo.xfb_varyings.end(), other.runtimeInfo.xfb_varyings.begin(), [](const Shader::TransformFeedbackVarying &a, const Shader::TransformFeedbackVarying &b) {
return a.buffer == b.buffer && a.stride == b.stride && a.offset == b.offset && a.components == b.components;
}))
return false;
static_assert(sizeof(Shader::RuntimeInfo) == 0x88);
return true;
}
constexpr size_t ShaderManager::ShaderModuleStateHash::operator()(const ShaderManager::ShaderModuleState &state) const {
size_t hash{};
boost::hash_combine(hash, state.program);
hash = XXH64(&state.bindings, sizeof(Shader::Backend::Bindings), hash);
#define RIH(member) boost::hash_combine(hash, state.runtimeInfo.member)
hash = XXH64(state.runtimeInfo.generic_input_types.data(), state.runtimeInfo.generic_input_types.size() * sizeof(u32), hash);
hash = XXH64(&state.runtimeInfo.previous_stage_stores.mask, sizeof(state.runtimeInfo.previous_stage_stores.mask), hash);
RIH(convert_depth_mode);
RIH(force_early_z);
RIH(tess_primitive);
RIH(tess_spacing);
RIH(tess_clockwise);
RIH(input_topology);
RIH(fixed_state_point_size.value_or(NAN));
RIH(alpha_test_func.value_or(Shader::CompareFunction::Never));
RIH(alpha_test_reference);
RIH(glasm_use_storage_buffers);
hash = XXH64(state.runtimeInfo.xfb_varyings.data(), state.runtimeInfo.xfb_varyings.size() * sizeof(Shader::TransformFeedbackVarying), hash);
static_assert(sizeof(Shader::RuntimeInfo) == 0x88);
#undef RIH
return hash;
}
vk::ShaderModule ShaderManager::CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr<ShaderProgram> &program, Shader::Backend::Bindings &bindings) {
auto it{shaderModuleCache.find(ShaderModuleState{program, bindings, runtimeInfo})};
if (it != shaderModuleCache.end())
return *it->second;
auto spirv{Shader::Backend::SPIRV::EmitSPIRV(profile, runtimeInfo, program->program, bindings)}; auto spirv{Shader::Backend::SPIRV::EmitSPIRV(profile, runtimeInfo, program->program, bindings)};
vk::ShaderModuleCreateInfo createInfo{ vk::ShaderModuleCreateInfo createInfo{
.pCode = spirv.data(), .pCode = spirv.data(),
.codeSize = spirv.size() * sizeof(u32), .codeSize = spirv.size() * sizeof(u32),
}; };
vk::raii::ShaderModule shaderModule(gpu.vkDevice, createInfo);
return shaderModule; auto shaderModule{shaderModuleCache.try_emplace(ShaderModuleState{program, bindings, runtimeInfo}, gpu.vkDevice, createInfo)};
return *shaderModule.first->second;
} }
} }

View File

@ -64,6 +64,23 @@ namespace skyline::gpu {
std::unordered_map<DualVertexPrograms, std::shared_ptr<DualVertexShaderProgram>, DualVertexProgramsHash> dualProgramCache; //!< A map from Vertex A and Vertex B shader programs to the corresponding dual vertex shader program std::unordered_map<DualVertexPrograms, std::shared_ptr<DualVertexShaderProgram>, DualVertexProgramsHash> dualProgramCache; //!< A map from Vertex A and Vertex B shader programs to the corresponding dual vertex shader program
/**
* @brief All unique state that is required to compile a shader program, this is used as the key for the associative compiled shader program cache
*/
struct ShaderModuleState {
std::shared_ptr<ShaderProgram> program;
Shader::Backend::Bindings bindings;
Shader::RuntimeInfo runtimeInfo;
bool operator==(const ShaderModuleState &) const;
};
struct ShaderModuleStateHash {
constexpr size_t operator()(const ShaderModuleState &state) const;
};
std::unordered_map<ShaderModuleState, vk::raii::ShaderModule, ShaderModuleStateHash> shaderModuleCache; //!< A map from shader module state to the corresponding Vulkan shader module
public: public:
ShaderManager(const DeviceState &state, GPU &gpu); ShaderManager(const DeviceState &state, GPU &gpu);
@ -75,6 +92,6 @@ namespace skyline::gpu {
*/ */
std::shared_ptr<ShaderManager::ShaderProgram> CombineVertexShaders(const std::shared_ptr<ShaderProgram> &vertexA, const std::shared_ptr<ShaderProgram> &vertexB, span<u8> vertexBBinary); std::shared_ptr<ShaderManager::ShaderProgram> CombineVertexShaders(const std::shared_ptr<ShaderProgram> &vertexA, const std::shared_ptr<ShaderProgram> &vertexB, span<u8> vertexBBinary);
vk::raii::ShaderModule CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr<ShaderProgram> &program, Shader::Backend::Bindings &bindings); vk::ShaderModule CompileShader(Shader::RuntimeInfo &runtimeInfo, const std::shared_ptr<ShaderProgram> &program, Shader::Backend::Bindings &bindings);
}; };
} }