mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-16 04:37:54 +03:00
Avoid using trapping for frequently trapped shaders
Fall back to hashing for every shader access as that ends up being faster than applying traps for every execution.
This commit is contained in:
parent
06095918a9
commit
7f1667de82
@ -261,6 +261,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
auto[blockMapping, blockOffset]{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->programRegion + engine->pipeline.programOffset)};
|
auto[blockMapping, blockOffset]{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->programRegion + engine->pipeline.programOffset)};
|
||||||
|
|
||||||
|
if (!trapExecutionLock)
|
||||||
|
trapExecutionLock.emplace(trapMutex);
|
||||||
|
|
||||||
// Skip looking up the mirror if it is the same as the one used for the previous update
|
// Skip looking up the mirror if it is the same as the one used for the previous update
|
||||||
if (!mirrorBlock.valid() || !mirrorBlock.contains(blockMapping)) {
|
if (!mirrorBlock.valid() || !mirrorBlock.contains(blockMapping)) {
|
||||||
auto mirrorIt{mirrorMap.find(blockMapping.data())};
|
auto mirrorIt{mirrorMap.find(blockMapping.data())};
|
||||||
@ -272,11 +275,13 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
auto trapHandle{ctx.nce.CreateTrap(blockMapping, [mutex = &trapMutex]() {
|
auto trapHandle{ctx.nce.CreateTrap(blockMapping, [mutex = &trapMutex]() {
|
||||||
std::scoped_lock lock{*mutex};
|
std::scoped_lock lock{*mutex};
|
||||||
return;
|
return;
|
||||||
}, []() { return true; }, [dirty = &newIt.first->second->dirty, mutex = &trapMutex]() {
|
}, []() { return true; }, [entry = newIt.first->second.get(), mutex = &trapMutex]() {
|
||||||
std::unique_lock lock{*mutex, std::try_to_lock};
|
std::unique_lock lock{*mutex, std::try_to_lock};
|
||||||
if (!lock)
|
if (!lock)
|
||||||
return false;
|
return false;
|
||||||
*dirty = true;
|
|
||||||
|
if (++entry->trapCount <= MirrorEntry::SkipTrapThreshold)
|
||||||
|
entry->dirty = true;
|
||||||
return true;
|
return true;
|
||||||
})};
|
})};
|
||||||
|
|
||||||
@ -292,14 +297,18 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
mirrorBlock = blockMapping;
|
mirrorBlock = blockMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!trapExecutionLock)
|
if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber) {
|
||||||
trapExecutionLock.emplace(trapMutex);
|
entry->channelSequenceNumber = ctx.channelCtx.channelSequenceNumber;
|
||||||
|
entry->dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
|
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
|
||||||
if (entry->dirty) {
|
if (entry->dirty) {
|
||||||
entry->cache.clear();
|
entry->cache.clear();
|
||||||
entry->dirty = false;
|
entry->dirty = false;
|
||||||
ctx.nce.TrapRegions(*entry->trap, true);
|
|
||||||
|
if (entry->trapCount <= MirrorEntry::SkipTrapThreshold)
|
||||||
|
ctx.nce.TrapRegions(*entry->trap, true);
|
||||||
} else if (auto it{entry->cache.find(blockMapping.data() + blockOffset)}; it != entry->cache.end()) {
|
} else if (auto it{entry->cache.find(blockMapping.data() + blockOffset)}; it != entry->cache.end()) {
|
||||||
binary = it->second.binary;
|
binary = it->second.binary;
|
||||||
hash = it->second.hash;
|
hash = it->second.hash;
|
||||||
@ -336,7 +345,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
if (!trapExecutionLock)
|
if (!trapExecutionLock)
|
||||||
trapExecutionLock.emplace(trapMutex);
|
trapExecutionLock.emplace(trapMutex);
|
||||||
|
|
||||||
if (entry && entry->dirty)
|
if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber)
|
||||||
|
return true;
|
||||||
|
else if (entry && entry->dirty)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -627,4 +638,4 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
std::shared_ptr<TextureView> PipelineState::GetDepthRenderTargetForClear(InterconnectContext &ctx) {
|
std::shared_ptr<TextureView> PipelineState::GetDepthRenderTargetForClear(InterconnectContext &ctx) {
|
||||||
return depthRenderTarget.UpdateGet(ctx, packedState).view;
|
return depthRenderTarget.UpdateGet(ctx, packedState).view;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,7 +81,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
span<u8> mirror;
|
span<u8> mirror;
|
||||||
tsl::robin_map<u8 *, CacheEntry> cache;
|
tsl::robin_map<u8 *, CacheEntry> cache;
|
||||||
std::optional<nce::NCE::TrapHandle> trap;
|
std::optional<nce::NCE::TrapHandle> trap;
|
||||||
bool dirty{};
|
|
||||||
|
static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
|
||||||
|
u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance
|
||||||
|
size_t channelSequenceNumber{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
|
||||||
|
bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared
|
||||||
|
|
||||||
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
|
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user