mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-04 07:05:29 +03:00
Skip zero-initializing shader bytecode backing
The backing for shader data would implicitly be zero-initialized due to a `resize` on every shader parse, this was entirely unnecessary as we would overwrite the entire range regardless. We avoid this by using statically allocated storage and a span over it containing the shader bytecode which avoids any unnecessary clear semantics without resorting to more complex solutions such as a custom allocator.
This commit is contained in:
parent
42573170c6
commit
25a29f9044
@ -135,11 +135,12 @@ namespace skyline {
|
|||||||
/**
|
/**
|
||||||
* @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value
|
* @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value
|
||||||
* @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading
|
* @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading
|
||||||
* @return If returning was caused by the supplied function returning a non-nullopt value or other conditions
|
* @return A span into the supplied container with the contents of the memory region
|
||||||
* @note The function will **NOT** be run on any sparse block
|
* @note The function will **NOT** be run on any sparse block
|
||||||
|
* @note The function will provide no feedback on if the end has been reached or if there was an early exit
|
||||||
*/
|
*/
|
||||||
template<typename Function, typename Container>
|
template<typename Function, typename Container>
|
||||||
bool ReadTill(Container& destination, VaType virt, Function function) {
|
span<u8> ReadTill(Container& destination, VaType virt, Function function) {
|
||||||
//TRACE_EVENT("containers", "FlatMemoryManager::ReadTill");
|
//TRACE_EVENT("containers", "FlatMemoryManager::ReadTill");
|
||||||
|
|
||||||
std::scoped_lock lock(this->blockMutex);
|
std::scoped_lock lock(this->blockMutex);
|
||||||
@ -158,18 +159,15 @@ namespace skyline {
|
|||||||
|
|
||||||
while (remainingSize) {
|
while (remainingSize) {
|
||||||
if (predecessor->phys == nullptr) {
|
if (predecessor->phys == nullptr) {
|
||||||
destination.resize(destination.size() - remainingSize);
|
return {destination.data(), destination.size() - remainingSize};
|
||||||
return false;
|
|
||||||
} else {
|
} else {
|
||||||
if (predecessor->extraInfo.sparseMapped) {
|
if (predecessor->extraInfo.sparseMapped) {
|
||||||
std::memset(pointer, 0, blockReadSize);
|
std::memset(pointer, 0, blockReadSize);
|
||||||
} else {
|
} else {
|
||||||
auto end{function(span<u8>(blockPhys, blockReadSize))};
|
auto end{function(span<u8>(blockPhys, blockReadSize))};
|
||||||
std::memcpy(pointer, blockPhys, end ? *end : blockReadSize);
|
std::memcpy(pointer, blockPhys, end ? *end : blockReadSize);
|
||||||
if (end) {
|
if (end)
|
||||||
destination.resize((destination.size() - remainingSize) + *end);
|
return {destination.data(), (destination.size() - remainingSize) + *end};
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,7 +181,7 @@ namespace skyline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return {destination.data(), destination.size()};
|
||||||
}
|
}
|
||||||
|
|
||||||
void Write(VaType virt, u8 *source, VaType size);
|
void Write(VaType virt, u8 *source, VaType size);
|
||||||
|
@ -747,7 +747,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
bool invalidated{true}; //!< If the shader that existed earlier has been invalidated
|
bool invalidated{true}; //!< If the shader that existed earlier has been invalidated
|
||||||
bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before
|
bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before
|
||||||
u32 offset{}; //!< Offset of the shader from the base IOVA
|
u32 offset{}; //!< Offset of the shader from the base IOVA
|
||||||
boost::container::static_vector<u8, MaxShaderBytecodeSize> data; //!< The shader bytecode in a statically allocated vector
|
std::array<u8, MaxShaderBytecodeSize> backing; //!< The backing storage for shader bytecode in a statically allocated array
|
||||||
|
span<u8> bytecode{}; //!< A span of the shader bytecode inside the backing storage
|
||||||
std::shared_ptr<ShaderManager::ShaderProgram> program{};
|
std::shared_ptr<ShaderManager::ShaderProgram> program{};
|
||||||
|
|
||||||
Shader(ShaderCompiler::Stage stage) : stage(stage) {}
|
Shader(ShaderCompiler::Stage stage) : stage(stage) {}
|
||||||
@ -914,10 +915,10 @@ namespace skyline::gpu::interconnect {
|
|||||||
// If a shader is invalidated, we need to reparse the program (given that it has changed)
|
// If a shader is invalidated, we need to reparse the program (given that it has changed)
|
||||||
|
|
||||||
bool shouldParseShader{[&]() {
|
bool shouldParseShader{[&]() {
|
||||||
if (!shader.data.empty() && shader.shouldCheckSame) {
|
if (shader.bytecode.valid() && shader.shouldCheckSame) {
|
||||||
// A fast path to check if the shader is the same as before to avoid reparsing the shader
|
// A fast path to check if the shader is the same as before to avoid reparsing the shader
|
||||||
auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.data.size())};
|
auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.bytecode.size())};
|
||||||
auto originalShader{shader.data.data()};
|
auto originalShader{shader.bytecode.data()};
|
||||||
|
|
||||||
for (auto &range : newIovaRanges) {
|
for (auto &range : newIovaRanges) {
|
||||||
if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) {
|
if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) {
|
||||||
@ -936,8 +937,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
if (shouldParseShader) {
|
if (shouldParseShader) {
|
||||||
// A pass to check if the shader has a BRA infloop opcode ending (On most commercial games)
|
// A pass to check if the shader has a BRA infloop opcode ending (On most commercial games)
|
||||||
shader.data.resize(MaxShaderBytecodeSize);
|
shader.bytecode = channelCtx.asCtx->gmmu.ReadTill(shader.backing, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
|
||||||
auto foundEnd{channelCtx.asCtx->gmmu.ReadTill(shader.data, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
|
|
||||||
// We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
|
// We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
|
||||||
// UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
|
// UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
|
||||||
constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
|
constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
|
||||||
@ -950,9 +950,9 @@ namespace skyline::gpu::interconnect {
|
|||||||
return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64);
|
return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64);
|
||||||
}
|
}
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
})};
|
});
|
||||||
|
|
||||||
shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.data, shader.offset, bindlessTextureConstantBufferIndex);
|
shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.bytecode, shader.offset, bindlessTextureConstantBufferIndex);
|
||||||
|
|
||||||
if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) {
|
if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) {
|
||||||
pipelineStage.program = shader.program;
|
pipelineStage.program = shader.program;
|
||||||
@ -963,13 +963,13 @@ namespace skyline::gpu::interconnect {
|
|||||||
throw exception("Enabling VertexA without VertexB is not supported");
|
throw exception("Enabling VertexA without VertexB is not supported");
|
||||||
else if (!vertexB.invalidated)
|
else if (!vertexB.invalidated)
|
||||||
// If only VertexA is invalidated, we need to recombine here but we can defer it otherwise
|
// If only VertexA is invalidated, we need to recombine here but we can defer it otherwise
|
||||||
pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.data);
|
pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.bytecode);
|
||||||
} else if (shader.stage == ShaderCompiler::Stage::VertexB) {
|
} else if (shader.stage == ShaderCompiler::Stage::VertexB) {
|
||||||
auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]};
|
auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]};
|
||||||
|
|
||||||
if (vertexA.enabled)
|
if (vertexA.enabled)
|
||||||
// We need to combine the vertex shader stages if VertexA is enabled
|
// We need to combine the vertex shader stages if VertexA is enabled
|
||||||
pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.data);
|
pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.bytecode);
|
||||||
else
|
else
|
||||||
pipelineStage.program = shader.program;
|
pipelineStage.program = shader.program;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user