mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-01 12:25:29 +03:00
Use NCE memory tracking for guest shaders
Prevents needing to hash them for every single pipeline state update, without this just hashing shaders takes up a significant amount of time.
This commit is contained in:
parent
19a75c3f65
commit
cf0752f937
@ -8,6 +8,10 @@
|
|||||||
#include <soc/gm20b/engines/maxwell/types.h>
|
#include <soc/gm20b/engines/maxwell/types.h>
|
||||||
#include <gpu/buffer.h>
|
#include <gpu/buffer.h>
|
||||||
|
|
||||||
|
namespace skyline::kernel {
|
||||||
|
class MemoryManager;
|
||||||
|
}
|
||||||
|
|
||||||
namespace skyline::soc::gm20b {
|
namespace skyline::soc::gm20b {
|
||||||
struct ChannelContext;
|
struct ChannelContext;
|
||||||
}
|
}
|
||||||
@ -26,6 +30,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
soc::gm20b::ChannelContext &channelCtx;
|
soc::gm20b::ChannelContext &channelCtx;
|
||||||
CommandExecutor &executor;
|
CommandExecutor &executor;
|
||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
|
nce::NCE &nce;
|
||||||
|
kernel::MemoryManager &memory;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -14,9 +14,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
Maxwell3D::Maxwell3D(GPU &gpu,
|
Maxwell3D::Maxwell3D(GPU &gpu,
|
||||||
soc::gm20b::ChannelContext &channelCtx,
|
soc::gm20b::ChannelContext &channelCtx,
|
||||||
gpu::interconnect::CommandExecutor &executor,
|
gpu::interconnect::CommandExecutor &executor,
|
||||||
|
nce::NCE &nce,
|
||||||
|
skyline::kernel::MemoryManager &memoryManager,
|
||||||
DirtyManager &manager,
|
DirtyManager &manager,
|
||||||
const EngineRegisterBundle ®isterBundle)
|
const EngineRegisterBundle ®isterBundle)
|
||||||
: ctx{channelCtx, executor, gpu},
|
: ctx{channelCtx, executor, gpu, nce, memoryManager},
|
||||||
activeState{manager, registerBundle.activeStateRegisters},
|
activeState{manager, registerBundle.activeStateRegisters},
|
||||||
clearEngineRegisters{registerBundle.clearRegisters},
|
clearEngineRegisters{registerBundle.clearRegisters},
|
||||||
constantBuffers{manager, registerBundle.constantBufferSelectorRegisters},
|
constantBuffers{manager, registerBundle.constantBufferSelectorRegisters},
|
||||||
|
@ -47,6 +47,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
Maxwell3D(GPU &gpu,
|
Maxwell3D(GPU &gpu,
|
||||||
soc::gm20b::ChannelContext &channelCtx,
|
soc::gm20b::ChannelContext &channelCtx,
|
||||||
gpu::interconnect::CommandExecutor &executor,
|
gpu::interconnect::CommandExecutor &executor,
|
||||||
|
nce::NCE &nce,
|
||||||
|
kernel::MemoryManager &memoryManager,
|
||||||
DirtyManager &manager,
|
DirtyManager &manager,
|
||||||
const EngineRegisterBundle ®isterBundle);
|
const EngineRegisterBundle ®isterBundle);
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
#include <range/v3/algorithm/for_each.hpp>
|
#include <range/v3/algorithm/for_each.hpp>
|
||||||
|
#include <nce.h>
|
||||||
|
#include <kernel/memory.h>
|
||||||
#include <soc/gm20b/channel.h>
|
#include <soc/gm20b/channel.h>
|
||||||
#include <soc/gm20b/gmmu.h>
|
#include <soc/gm20b/gmmu.h>
|
||||||
#include <gpu/texture/format.h>
|
#include <gpu/texture/format.h>
|
||||||
@ -228,24 +230,74 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
binary.binary = ctx.channelCtx.asCtx->gmmu.ReadTill(shaderBacking, engine->programRegion + engine->pipeline.programOffset, [](span<u8> data) -> std::optional<size_t> {
|
auto [blockMapping, blockOffset]{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->programRegion + engine->pipeline.programOffset)};
|
||||||
|
|
||||||
|
// Skip looking up the mirror if it is the same as the one used for the previous update
|
||||||
|
if (!mirrorBlock.valid() || !mirrorBlock.contains(blockMapping)) {
|
||||||
|
auto mirrorIt{mirrorMap.find(blockMapping.data())};
|
||||||
|
if (mirrorIt == mirrorMap.end()) {
|
||||||
|
// Allocate a host mirror for the mapping and trap the guest region
|
||||||
|
auto newIt{mirrorMap.emplace(blockMapping.data(), std::make_unique<MirrorEntry>(ctx.memory.CreateMirror(blockMapping)))};
|
||||||
|
|
||||||
|
// We need to create the trap after allocating the entry so that we have an `invalid` pointer we can pass in
|
||||||
|
auto trapHandle{ctx.nce.CreateTrap(blockMapping, [](){}, [](){ return true; }, [dirty = &newIt.first->second->dirty, mutex = &trapMutex](){
|
||||||
|
std::scoped_lock lock{*mutex}; // Don't use lock callback here since we need trapMutex to be always locked on accesses to prevent UAFs
|
||||||
|
*dirty = true;
|
||||||
|
return true;
|
||||||
|
})};
|
||||||
|
|
||||||
|
// Write only trap
|
||||||
|
ctx.nce.TrapRegions(trapHandle, true);
|
||||||
|
|
||||||
|
entry = newIt.first->second.get();
|
||||||
|
entry->trap = trapHandle;
|
||||||
|
} else {
|
||||||
|
entry = mirrorIt->second.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
mirrorBlock = blockMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
|
||||||
|
if (entry->dirty) {
|
||||||
|
entry->cache.clear();
|
||||||
|
entry->dirty = false;
|
||||||
|
ctx.nce.TrapRegions(*entry->trap, true);
|
||||||
|
} else if (auto it{entry->cache.find(blockMapping.data() + blockOffset)}; it != entry->cache.end()) {
|
||||||
|
binary = it->second.binary;
|
||||||
|
hash = it->second.hash;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If nothing was in the cache then do a full shader parse
|
||||||
|
auto guest{[](span<u8> mapping) {
|
||||||
// We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
|
// We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
|
||||||
// UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
|
// UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
|
||||||
constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
|
constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
|
||||||
|
|
||||||
span<u64> shaderInstructions{data.cast<u64, std::dynamic_extent, true>()};
|
span<u64> shaderInstructions{mapping.cast<u64, std::dynamic_extent, true>()};
|
||||||
for (auto it{shaderInstructions.begin()}; it != shaderInstructions.end(); it++) {
|
for (auto it{shaderInstructions.begin()}; it != shaderInstructions.end(); it++) {
|
||||||
auto instruction{*it};
|
auto instruction{*it};
|
||||||
if (instruction == BraSelf1 || instruction == BraSelf2) [[unlikely]]
|
if (instruction == BraSelf1 || instruction == BraSelf2) [[unlikely]]
|
||||||
// It is far more likely that the instruction doesn't match so this is an unlikely case
|
// It is far more likely that the instruction doesn't match so this is an unlikely case
|
||||||
return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64);
|
return span{shaderInstructions.begin(), it}.cast<u8>();
|
||||||
}
|
}
|
||||||
return std::nullopt;
|
|
||||||
});
|
return span<u8>{};
|
||||||
|
}(blockMapping.subspan(blockOffset))};
|
||||||
|
|
||||||
binary.baseOffset = engine->pipeline.programOffset;
|
binary.baseOffset = engine->pipeline.programOffset;
|
||||||
|
hash = XXH64(guest.data(), guest.size_bytes(), 0);
|
||||||
|
|
||||||
hash = XXH64(binary.binary.data(), binary.binary.size_bytes(), 0);
|
binary.binary = {guest.data() - mirrorBlock.data() + entry->mirror.data(), guest.size()};
|
||||||
|
|
||||||
|
entry->cache.insert({blockMapping.data() + blockOffset, CacheEntry{binary, hash}});
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineStageState::~PipelineStageState() {
|
||||||
|
std::scoped_lock lock{trapMutex};
|
||||||
|
//for (const auto &mirror : mirrorMap)
|
||||||
|
// ctx.nce.DestroyTrap(*mirror.second->trap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertex Input State */
|
/* Vertex Input State */
|
||||||
|
@ -65,12 +65,31 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct CacheEntry {
|
||||||
|
ShaderBinary binary;
|
||||||
|
u64 hash;
|
||||||
|
|
||||||
|
CacheEntry(ShaderBinary binary, u64 hash) : binary{binary}, hash{hash} {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Holds mirror state for a single GPU mapped block
|
||||||
|
*/
|
||||||
|
struct MirrorEntry {
|
||||||
|
span<u8> mirror;
|
||||||
|
tsl::robin_map<u8 *, CacheEntry> cache;
|
||||||
|
std::optional<nce::NCE::TrapHandle> trap;
|
||||||
|
bool dirty{};
|
||||||
|
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
|
||||||
|
};
|
||||||
|
|
||||||
dirty::BoundSubresource<EngineRegisters> engine;
|
dirty::BoundSubresource<EngineRegisters> engine;
|
||||||
engine::Pipeline::Shader::Type shaderType;
|
engine::Pipeline::Shader::Type shaderType;
|
||||||
|
|
||||||
constexpr static size_t MaxShaderBytecodeSize{1 * 1024 * 1024}; //!< The largest shader binary that we support (1 MiB)
|
tsl::robin_map<u8 *, std::unique_ptr<MirrorEntry>> mirrorMap;
|
||||||
|
std::mutex trapMutex; //!< Protects accesses from trap handlers to the mirror map
|
||||||
std::array<u8, MaxShaderBytecodeSize> shaderBacking;
|
MirrorEntry *entry{};
|
||||||
|
span<u8> mirrorBlock{}; //!< Guest mapped memory block corresponding to `entry`
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ShaderBinary binary;
|
ShaderBinary binary;
|
||||||
@ -78,6 +97,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
PipelineStageState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u8 shaderType);
|
PipelineStageState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u8 shaderType);
|
||||||
|
|
||||||
|
~PipelineStageState();
|
||||||
|
|
||||||
void Flush(InterconnectContext &ctx);
|
void Flush(InterconnectContext &ctx);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
syncpoints{state.soc->host1x.syncpoints},
|
syncpoints{state.soc->host1x.syncpoints},
|
||||||
i2m{channelCtx},
|
i2m{channelCtx},
|
||||||
dirtyManager{registers},
|
dirtyManager{registers},
|
||||||
interconnect{*state.gpu, channelCtx, executor, dirtyManager, MakeEngineRegisters(registers)},
|
interconnect{*state.gpu, channelCtx, executor, *state.nce, state.process->memory, dirtyManager, MakeEngineRegisters(registers)},
|
||||||
channelCtx{channelCtx} {
|
channelCtx{channelCtx} {
|
||||||
executor.AddFlushCallback([this]() { FlushEngineState(); });
|
executor.AddFlushCallback([this]() { FlushEngineState(); });
|
||||||
InitializeRegisters();
|
InitializeRegisters();
|
||||||
|
Loading…
Reference in New Issue
Block a user