Fix NCE Trapping API Deadlock

A deadlock was caused by holding `trapMutex` while waiting on the lock of a resource inside a callback while another thread holding the resource's mutex waits on `trapMutex`. This has been fixed by no longer allowing blocking locks inside the callbacks and introducing a separate callback for locking the resource which is done after unlocking the `trapMutex` which can then be locked by any contending threads.
This commit is contained in:
PixelyIon 2022-07-02 21:45:50 +05:30
parent a6599c30b4
commit 3ca56ef578
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
4 changed files with 77 additions and 47 deletions

View File

@ -17,17 +17,23 @@ namespace skyline::gpu {
trapHandle = gpu.state.nce->TrapRegions(*guest, true, [this] { trapHandle = gpu.state.nce->TrapRegions(*guest, true, [this] {
std::scoped_lock lock{*this}; std::scoped_lock lock{*this};
}, [this] {
std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true); // We can skip trapping since the caller will do it SynchronizeGuest(true); // We can skip trapping since the caller will do it
WaitOnFence(); return true;
}, [this] { }, [this] {
DirtyState expectedState{DirtyState::Clean}; DirtyState expectedState{DirtyState::Clean};
if (dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty) if (dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty)
return; // If we can transition the buffer to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the buffer is GPU dirty return true; // If we can transition the buffer to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the buffer is GPU dirty
std::scoped_lock lock{*this}; std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true); SynchronizeGuest(true);
dirtyState = DirtyState::CpuDirty; // We need to assume the buffer is dirty since we don't know what the guest is writing dirtyState = DirtyState::CpuDirty; // We need to assume the buffer is dirty since we don't know what the guest is writing
WaitOnFence(); return true;
}); });
} }

View File

@ -145,17 +145,23 @@ namespace skyline::gpu {
trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] { trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] {
std::scoped_lock lock{*this}; std::scoped_lock lock{*this};
}, [this] {
std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true); // We can skip trapping since the caller will do it SynchronizeGuest(true); // We can skip trapping since the caller will do it
WaitOnFence(); return true;
}, [this] { }, [this] {
DirtyState expectedState{DirtyState::Clean}; DirtyState expectedState{DirtyState::Clean};
if (dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty) if (dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty)
return; // If we can transition the texture to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the texture is GPU dirty return true; // If we can transition the texture to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the texture is GPU dirty
std::scoped_lock lock{*this}; std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true); SynchronizeGuest(true);
dirtyState = DirtyState::CpuDirty; // We need to assume the texture is dirty since we don't know what the guest is writing dirtyState = DirtyState::CpuDirty; // We need to assume the texture is dirty since we don't know what the guest is writing
WaitOnFence(); return true;
}); });
} }

View File

@ -406,7 +406,7 @@ namespace skyline::nce {
} }
} }
NCE::CallbackEntry::CallbackEntry(TrapProtection protection, NCE::TrapCallback readCallback, NCE::TrapCallback writeCallback) : protection(protection), readCallback(std::move(readCallback)), writeCallback(std::move(writeCallback)) {} NCE::CallbackEntry::CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback) : protection{protection}, lockCallback{std::move(lockCallback)}, readCallback{std::move(readCallback)}, writeCallback{std::move(writeCallback)} {}
void NCE::ReprotectIntervals(const std::vector<TrapMap::Interval> &intervals, TrapProtection protection) { void NCE::ReprotectIntervals(const std::vector<TrapMap::Interval> &intervals, TrapProtection protection) {
auto reprotectIntervalsWithFunction = [&intervals](auto getProtection) { auto reprotectIntervalsWithFunction = [&intervals](auto getProtection) {
@ -467,55 +467,68 @@ namespace skyline::nce {
} }
bool NCE::TrapHandler(u8 *address, bool write) { bool NCE::TrapHandler(u8 *address, bool write) {
std::scoped_lock lock(trapMutex); LockCallback lockCallback{};
while (true) {
if (lockCallback)
// We want to avoid a deadlock of holding trapMutex while locking the resource inside a callback while another thread holding the resource's mutex waits on trapMutex, we solve this by quitting the loop if a callback would be blocking and attempt to lock the resource externally
lockCallback();
// Check if we have a callback for this address std::scoped_lock lock(trapMutex);
auto[entries, intervals]{trapMap.GetAlignedRecursiveRange<PAGE_SIZE>(address)};
if (entries.empty()) // Check if we have a callback for this address
return false; auto[entries, intervals]{trapMap.GetAlignedRecursiveRange<PAGE_SIZE>(address)};
// Do callbacks for every entry in the intervals if (entries.empty())
if (write) { return false;
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection == TrapProtection::None)
// We don't need to do the callback if the entry doesn't require any protection already
continue;
entry.writeCallback(); // Do callbacks for every entry in the intervals
entry.protection = TrapProtection::None; // We don't need to protect this entry anymore if (write) {
} for (auto entryRef : entries) {
} else { auto &entry{entryRef.get()};
bool allNone{true}; // If all entries require no protection, we can protect to allow all accesses if (entry.protection == TrapProtection::None)
for (auto entryRef : entries) { // We don't need to do the callback if the entry doesn't require any protection already
auto &entry{entryRef.get()}; continue;
if (entry.protection < TrapProtection::ReadWrite) {
// We don't need to do the callback if the entry can already handle read accesses if (!entry.writeCallback()) {
allNone = allNone && entry.protection == TrapProtection::None; lockCallback = entry.lockCallback;
continue; continue;
}
entry.protection = TrapProtection::None; // We don't need to protect this entry anymore
} }
} else {
bool allNone{true}; // If all entries require no protection, we can protect to allow all accesses
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection < TrapProtection::ReadWrite) {
// We don't need to do the callback if the entry can already handle read accesses
allNone = allNone && entry.protection == TrapProtection::None;
continue;
}
entry.readCallback(); if (!entry.readCallback()) {
entry.protection = TrapProtection::WriteOnly; // We only need to trap writes to this entry lockCallback = entry.lockCallback;
continue;
}
entry.protection = TrapProtection::WriteOnly; // We only need to trap writes to this entry
}
write = allNone;
} }
write = allNone;
int permission{PROT_READ | (write ? PROT_WRITE : 0) | PROT_EXEC};
for (const auto &interval : intervals)
// Reprotect the interval to the lowest protection level that the callbacks performed allow
mprotect(interval.start, interval.Size(), permission);
return true;
} }
int permission{PROT_READ | (write ? PROT_WRITE : 0) | PROT_EXEC};
for (const auto &interval : intervals)
// Reprotect the interval to the lowest protection level that the callbacks performed allow
mprotect(interval.start, interval.Size(), permission);
return true;
} }
constexpr NCE::TrapHandle::TrapHandle(const TrapMap::GroupHandle &handle) : TrapMap::GroupHandle(handle) {} constexpr NCE::TrapHandle::TrapHandle(const TrapMap::GroupHandle &handle) : TrapMap::GroupHandle(handle) {}
NCE::TrapHandle NCE::TrapRegions(span<span<u8>> regions, bool writeOnly, const TrapCallback &readCallback, const TrapCallback &writeCallback) { NCE::TrapHandle NCE::TrapRegions(span<span<u8>> regions, bool writeOnly, const LockCallback& lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
std::scoped_lock lock(trapMutex); std::scoped_lock lock(trapMutex);
auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite}; auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite};
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{protection, readCallback, writeCallback})}; TrapHandle handle{trapMap.Insert(regions, CallbackEntry{protection, lockCallback, readCallback, writeCallback})};
ReprotectIntervals(handle->intervals, protection); ReprotectIntervals(handle->intervals, protection);
return handle; return handle;
} }

View File

@ -24,13 +24,15 @@ namespace skyline::nce {
ReadWrite = 2, //!< Both read and write protection are required ReadWrite = 2, //!< Both read and write protection are required
}; };
using TrapCallback = std::function<void()>; using TrapCallback = std::function<bool()>;
using LockCallback = std::function<void()>;
struct CallbackEntry { struct CallbackEntry {
TrapProtection protection; //!< The least restrictive protection that this callback needs to have TrapProtection protection; //!< The least restrictive protection that this callback needs to have
LockCallback lockCallback;
TrapCallback readCallback, writeCallback; TrapCallback readCallback, writeCallback;
CallbackEntry(TrapProtection protection, NCE::TrapCallback readCallback, NCE::TrapCallback writeCallback); CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback);
}; };
std::mutex trapMutex; //!< Synchronizes the accesses to the trap map std::mutex trapMutex; //!< Synchronizes the accesses to the trap map
@ -102,11 +104,14 @@ namespace skyline::nce {
/** /**
* @brief Traps a region of guest memory with a callback for when an access to it has been made * @brief Traps a region of guest memory with a callback for when an access to it has been made
* @param writeOnly If the trap is optimally for write-only accesses initially, this is not guarenteed * @param writeOnly If the trap is optimally for write-only accesses initially, this is not guarenteed
* @param lockCallback A callback to lock the resource that is being trapped, it must block until the resource is locked but unlock it prior to returning
* @param readCallback A callback for read accesses to the trapped region, it must not block and return a boolean if it would block
* @param writeCallback A callback for write accesses to the trapped region, it must not block and return a boolean if it would block
* @note The handle **must** be deleted using DeleteTrap before the NCE instance is destroyed * @note The handle **must** be deleted using DeleteTrap before the NCE instance is destroyed
* @note It is UB to supply a region of host memory rather than guest memory * @note It is UB to supply a region of host memory rather than guest memory
* @note Any regions trapped without writeOnly may have their data (except border pages) paged out and it needs to be paged back in inside the callbacks * @note Any regions trapped without writeOnly may have their data (except border pages) paged out and it needs to be paged back in inside the callbacks
*/ */
TrapHandle TrapRegions(span<span<u8>> regions, bool writeOnly, const TrapCallback& readCallback, const TrapCallback& writeCallback); TrapHandle TrapRegions(span<span<u8>> regions, bool writeOnly, const LockCallback& lockCallback, const TrapCallback& readCallback, const TrapCallback& writeCallback);
/** /**
* @brief Re-traps a region of memory after protections were removed * @brief Re-traps a region of memory after protections were removed