diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 092d2d17..cc2267d9 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -400,14 +400,18 @@ namespace skyline::gpu { buffer(*this), megaBufferAllocator(*this), descriptor(*this), - shader(state, *this), helperShaders(*this, state.os->assetFileSystem), graphicsPipelineCache(*this), renderPassCache(*this), framebufferCache(*this) {} void GPU::Initialise() { - graphicsPipelineCacheManager.emplace(state, state.os->publicAppFilesPath + "graphics_pipeline_cache/" + state.loader->nacp->GetSaveDataOwnerId()); + std::string titleId{state.loader->nacp->GetSaveDataOwnerId()}; + shader.emplace(state, *this, + state.os->publicAppFilesPath + "shader_replacements/" + titleId, + state.os->publicAppFilesPath + "shader_dumps/" + titleId); + graphicsPipelineCacheManager.emplace(state, + state.os->publicAppFilesPath + "graphics_pipeline_cache/" + titleId); graphicsPipelineManager.emplace(*this); } } diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index 240a69b6..71079d40 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -55,7 +55,7 @@ namespace skyline::gpu { MegaBufferAllocator megaBufferAllocator; DescriptorAllocator descriptor; - ShaderManager shader; + std::optional shader; HelperShaders helperShaders; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp index b1c22f25..508dd72d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp @@ -10,10 +10,10 @@ namespace skyline::gpu::interconnect::kepler_compute { static Pipeline::ShaderStage MakePipelineShader(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, const PackedPipelineState &packedState, const ShaderBinary &shaderBinary) { - ctx.gpu.shader.ResetPools(); + ctx.gpu.shader->ResetPools(); - auto program{ctx.gpu.shader.ParseComputeShader( - shaderBinary.binary, shaderBinary.baseOffset, + auto program{ctx.gpu.shader->ParseComputeShader( + packedState.shaderHash, shaderBinary.binary, shaderBinary.baseOffset, packedState.bindlessTextureConstantBufferSlotSelect, packedState.localMemorySize, packedState.sharedMemorySize, packedState.dimensions, @@ -25,7 +25,7 @@ namespace skyline::gpu::interconnect::kepler_compute { Shader::Backend::Bindings bindings{}; - return {ctx.gpu.shader.CompileShader({}, program, bindings), program.info}; + return {ctx.gpu.shader->CompileShader({}, program, bindings), program.info}; } static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const Pipeline::ShaderStage &stage) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp index b5f1a43e..59b740b9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp @@ -187,7 +187,7 @@ namespace skyline::gpu::interconnect::maxwell3d { } static std::array MakePipelineShaders(GPU &gpu, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState) { - gpu.shader.ResetPools(); + gpu.shader->ResetPools(); using PipelineStage = engine::Pipeline::Shader::Type; auto pipelineStage{[](u32 i) { return static_cast(i); }}; @@ -200,16 +200,16 @@ namespace skyline::gpu::interconnect::maxwell3d { for (u32 i{}; i < engine::PipelineCount; i++) { if (!packedState.shaderHashes[i]) { if (i == stageIdx(PipelineStage::Geometry) && layerConversionSourceProgram) - programs[i] = gpu.shader.GenerateGeometryPassthroughShader(*layerConversionSourceProgram, ConvertShaderOutputTopology(packedState.topology)); + programs[i] = gpu.shader->GenerateGeometryPassthroughShader(*layerConversionSourceProgram, ConvertShaderOutputTopology(packedState.topology)); continue; } auto binary{accessor.GetShaderBinary(i)}; - auto program{gpu.shader.ParseGraphicsShader( + auto program{gpu.shader->ParseGraphicsShader( packedState.postVtgShaderAttributeSkipMask, ConvertCompilerShaderStage(static_cast(i)), - binary.binary, binary.baseOffset, + packedState.shaderHashes[i], binary.binary, binary.baseOffset, packedState.bindlessTextureConstantBufferSlotSelect, packedState.viewportTransformEnable, [&](u32 index, u32 offset) { @@ -220,7 +220,7 @@ namespace skyline::gpu::interconnect::maxwell3d { })}; if (i == stageIdx(PipelineStage::Vertex) && packedState.shaderHashes[stageIdx(PipelineStage::VertexCullBeforeFetch)]) { ignoreVertexCullBeforeFetch = true; - programs[i] = gpu.shader.CombineVertexShaders(programs[stageIdx(PipelineStage::VertexCullBeforeFetch)], program, binary.binary); + programs[i] = gpu.shader->CombineVertexShaders(programs[stageIdx(PipelineStage::VertexCullBeforeFetch)], program, binary.binary); } else { programs[i] = program; } @@ -240,7 +240,9 @@ namespace skyline::gpu::interconnect::maxwell3d { continue; auto runtimeInfo{MakeRuntimeInfo(packedState, programs[i], lastProgram, hasGeometry)}; - shaderStages[i - (i >= 1 ? 1 : 0)] = {ConvertVkShaderStage(pipelineStage(i)), gpu.shader.CompileShader(runtimeInfo, programs[i], bindings), programs[i].info}; + shaderStages[i - (i >= 1 ? 1 : 0)] = {ConvertVkShaderStage(pipelineStage(i)), + gpu.shader->CompileShader(runtimeInfo, programs[i], bindings, packedState.shaderHashes[i]), + programs[i].info}; lastProgram = &programs[i]; } diff --git a/app/src/main/cpp/skyline/gpu/shader_manager.cpp b/app/src/main/cpp/skyline/gpu/shader_manager.cpp index 86765d62..2082c54c 100644 --- a/app/src/main/cpp/skyline/gpu/shader_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/shader_manager.cpp @@ -11,6 +11,8 @@ #include #include "shader_manager.h" +static constexpr bool DumpShaders{false}; + namespace Shader::Log { void Debug(const std::string &message) { skyline::Logger::Write(skyline::Logger::LogLevel::Debug, message); @@ -26,7 +28,53 @@ namespace Shader::Log { } namespace skyline::gpu { - ShaderManager::ShaderManager(const DeviceState &state, GPU &gpu) : gpu{gpu} { + void ShaderManager::LoadShaderReplacements(std::string_view replacementDir) { + std::filesystem::path replacementDirPath{replacementDir}; + if (std::filesystem::exists(replacementDirPath)) { + for (const auto &entry : std::filesystem::directory_iterator{replacementDirPath}) { + if (entry.is_regular_file()) { + // Parse hash from filename + u64 hash{std::stoull(entry.path().filename().string(), nullptr, 16)}; + auto it{shaderReplacements.insert({hash, {}})}; + + // Read file into map entry + std::ifstream file{entry.path(), std::ios::binary | std::ios::ate}; + it.first->second.resize(static_cast(file.tellg())); + file.seekg(0, std::ios::beg); + file.read(reinterpret_cast(it.first->second.data()), static_cast(it.first->second.size())); + } + } + } + } + + span ShaderManager::ProcessShaderBinary(u64 hash, span binary) { + auto it{shaderReplacements.find(hash)}; + if (it != shaderReplacements.end()) { + Logger::Info("Replacing shader with hash: 0x{:X}", hash); + return it->second; + } + + if (DumpShaders) { + std::scoped_lock lock{dumpMutex}; + + auto shaderPath{dumpPath / fmt::format("{:016X}", hash)}; + if (!std::filesystem::exists(shaderPath)) { + std::ofstream file{shaderPath, std::ios::binary}; + file.write(reinterpret_cast(binary.data()), static_cast(binary.size())); + } + } + + return binary; + } + + ShaderManager::ShaderManager(const DeviceState &state, GPU &gpu, std::string_view replacementDir, std::string_view dumpDir) : gpu{gpu}, dumpPath{dumpDir} { + LoadShaderReplacements(replacementDir); + + if constexpr (DumpShaders) { + if (!std::filesystem::exists(dumpPath)) + std::filesystem::create_directories(dumpPath); + } + auto &traits{gpu.traits}; hostTranslateInfo = Shader::HostTranslateInfo{ .support_float16 = traits.supportsFloat16, @@ -282,15 +330,25 @@ namespace skyline::gpu { return {0, 0, 0}; // Only relevant for compute shaders } + [[nodiscard]] bool HasHLEMacroState() const final { + return false; + } + + [[nodiscard]] std::optional GetReplaceConstBuffer(u32 bank, u32 offset) final { + return std::nullopt; + } + void Dump(u64 hash) final {} }; Shader::IR::Program ShaderManager::ParseGraphicsShader(const std::array &postVtgShaderAttributeSkipMask, Shader::Stage stage, - span binary, u32 baseOffset, + u64 hash, span binary, u32 baseOffset, u32 textureConstantBufferIndex, bool viewportTransformEnabled, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) { + binary = ProcessShaderBinary(hash, binary); + std::scoped_lock lock{poolMutex}; GraphicsEnvironment environment{postVtgShaderAttributeSkipMask, stage, binary, baseOffset, textureConstantBufferIndex, viewportTransformEnabled, constantBufferRead, getTextureType}; @@ -311,11 +369,13 @@ namespace skyline::gpu { return Shader::Maxwell::GenerateGeometryPassthrough(instructionPool, blockPool, hostTranslateInfo, layerSource, topology); } - Shader::IR::Program ShaderManager::ParseComputeShader(span binary, u32 baseOffset, + Shader::IR::Program ShaderManager::ParseComputeShader(u64 hash, span binary, u32 baseOffset, u32 textureConstantBufferIndex, u32 localMemorySize, u32 sharedMemorySize, std::array workgroupDimensions, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) { + binary = ProcessShaderBinary(hash, binary); + std::scoped_lock lock{poolMutex}; ComputeEnvironment environment{binary, baseOffset, textureConstantBufferIndex, localMemorySize, sharedMemorySize, workgroupDimensions, constantBufferRead, getTextureType}; diff --git a/app/src/main/cpp/skyline/gpu/shader_manager.h b/app/src/main/cpp/skyline/gpu/shader_manager.h index 3ffcab1a..c430a51d 100644 --- a/app/src/main/cpp/skyline/gpu/shader_manager.h +++ b/app/src/main/cpp/skyline/gpu/shader_manager.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -27,18 +28,32 @@ namespace skyline::gpu { Shader::ObjectPool flowBlockPool; Shader::ObjectPool instructionPool; Shader::ObjectPool blockPool; + std::unordered_map> shaderReplacements; //!< Map of shader hash -> replacement shader binary, populated at init time and must not be modified after std::mutex poolMutex; + std::filesystem::path dumpPath; + std::mutex dumpMutex; + + /** + * @brief Called at init time to populate the shader replacements map from the input directory + */ + void LoadShaderReplacements(std::string_view replacementDir); + + /** + * @brief Returns the raw binary of shader replacement for the given hash, if no replacement is found the input binary is returned + * @note This will also dump the binary to disk if dumping is enabled + */ + span ProcessShaderBinary(u64 hash, span binary); public: using ConstantBufferRead = std::function; //!< A function which reads a constant buffer at the specified offset and returns the value using GetTextureType = std::function; //!< A function which determines the type of a texture from its handle by checking the corresponding TIC - ShaderManager(const DeviceState &state, GPU &gpu); + ShaderManager(const DeviceState &state, GPU &gpu, std::string_view replacementDir, std::string_view dumpDir); /** * @return A shader program that corresponds to all the supplied state including the current state of the constant buffers */ - Shader::IR::Program ParseGraphicsShader(const std::array &postVtgShaderAttributeSkipMask, Shader::Stage stage, span binary, u32 baseOffset, u32 textureConstantBufferIndex, bool viewportTransformEnabled, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType); + Shader::IR::Program ParseGraphicsShader(const std::array &postVtgShaderAttributeSkipMask, Shader::Stage stage, u64 hash, span binary, u32 baseOffset, u32 textureConstantBufferIndex, bool viewportTransformEnabled, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType); /** * @brief Combines the VertexA and VertexB shader programs into a single program @@ -51,7 +66,7 @@ namespace skyline::gpu { */ Shader::IR::Program GenerateGeometryPassthroughShader(Shader::IR::Program &layerSource, Shader::OutputTopology topology); - Shader::IR::Program ParseComputeShader(span binary, u32 baseOffset, u32 textureConstantBufferIndex, u32 localMemorySize, u32 sharedMemorySize, std::array workgroupDimensions, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType); + Shader::IR::Program ParseComputeShader(u64 hash, span binary, u32 baseOffset, u32 textureConstantBufferIndex, u32 localMemorySize, u32 sharedMemorySize, std::array workgroupDimensions, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType); vk::ShaderModule CompileShader(const Shader::RuntimeInfo &runtimeInfo, Shader::IR::Program &program, Shader::Backend::Bindings &bindings);