Avoid using trapping for frequently trapped shaders

Fall back to hashing for every shader access as that ends up being faster than applying traps for every execution.
This commit is contained in:
Billy Laws 2022-11-10 21:49:54 +00:00
parent 06095918a9
commit 7f1667de82
2 changed files with 23 additions and 8 deletions

View File

@ -261,6 +261,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
auto[blockMapping, blockOffset]{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->programRegion + engine->pipeline.programOffset)}; auto[blockMapping, blockOffset]{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->programRegion + engine->pipeline.programOffset)};
if (!trapExecutionLock)
trapExecutionLock.emplace(trapMutex);
// Skip looking up the mirror if it is the same as the one used for the previous update // Skip looking up the mirror if it is the same as the one used for the previous update
if (!mirrorBlock.valid() || !mirrorBlock.contains(blockMapping)) { if (!mirrorBlock.valid() || !mirrorBlock.contains(blockMapping)) {
auto mirrorIt{mirrorMap.find(blockMapping.data())}; auto mirrorIt{mirrorMap.find(blockMapping.data())};
@ -272,11 +275,13 @@ namespace skyline::gpu::interconnect::maxwell3d {
auto trapHandle{ctx.nce.CreateTrap(blockMapping, [mutex = &trapMutex]() { auto trapHandle{ctx.nce.CreateTrap(blockMapping, [mutex = &trapMutex]() {
std::scoped_lock lock{*mutex}; std::scoped_lock lock{*mutex};
return; return;
}, []() { return true; }, [dirty = &newIt.first->second->dirty, mutex = &trapMutex]() { }, []() { return true; }, [entry = newIt.first->second.get(), mutex = &trapMutex]() {
std::unique_lock lock{*mutex, std::try_to_lock}; std::unique_lock lock{*mutex, std::try_to_lock};
if (!lock) if (!lock)
return false; return false;
*dirty = true;
if (++entry->trapCount <= MirrorEntry::SkipTrapThreshold)
entry->dirty = true;
return true; return true;
})}; })};
@ -292,14 +297,18 @@ namespace skyline::gpu::interconnect::maxwell3d {
mirrorBlock = blockMapping; mirrorBlock = blockMapping;
} }
if (!trapExecutionLock) if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber) {
trapExecutionLock.emplace(trapMutex); entry->channelSequenceNumber = ctx.channelCtx.channelSequenceNumber;
entry->dirty = true;
}
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes // If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
if (entry->dirty) { if (entry->dirty) {
entry->cache.clear(); entry->cache.clear();
entry->dirty = false; entry->dirty = false;
ctx.nce.TrapRegions(*entry->trap, true);
if (entry->trapCount <= MirrorEntry::SkipTrapThreshold)
ctx.nce.TrapRegions(*entry->trap, true);
} else if (auto it{entry->cache.find(blockMapping.data() + blockOffset)}; it != entry->cache.end()) { } else if (auto it{entry->cache.find(blockMapping.data() + blockOffset)}; it != entry->cache.end()) {
binary = it->second.binary; binary = it->second.binary;
hash = it->second.hash; hash = it->second.hash;
@ -336,7 +345,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (!trapExecutionLock) if (!trapExecutionLock)
trapExecutionLock.emplace(trapMutex); trapExecutionLock.emplace(trapMutex);
if (entry && entry->dirty) if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber)
return true;
else if (entry && entry->dirty)
return true; return true;
return false; return false;
@ -627,4 +638,4 @@ namespace skyline::gpu::interconnect::maxwell3d {
std::shared_ptr<TextureView> PipelineState::GetDepthRenderTargetForClear(InterconnectContext &ctx) { std::shared_ptr<TextureView> PipelineState::GetDepthRenderTargetForClear(InterconnectContext &ctx) {
return depthRenderTarget.UpdateGet(ctx, packedState).view; return depthRenderTarget.UpdateGet(ctx, packedState).view;
} }
} }

View File

@ -81,7 +81,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
span<u8> mirror; span<u8> mirror;
tsl::robin_map<u8 *, CacheEntry> cache; tsl::robin_map<u8 *, CacheEntry> cache;
std::optional<nce::NCE::TrapHandle> trap; std::optional<nce::NCE::TrapHandle> trap;
bool dirty{};
static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance
size_t channelSequenceNumber{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {} MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
}; };