Implement a shared spinlock and use it for GPU VMM

This commit is contained in:
Billy Laws 2023-01-09 21:15:55 +00:00
parent fd5c141dbf
commit 85a23e73ba
3 changed files with 80 additions and 16 deletions

View File

@ -55,7 +55,7 @@ namespace skyline {
}
};
std::shared_mutex blockMutex;
SharedSpinLock blockMutex;
std::vector<Block> blocks{Block{}};
/**

View File

@ -8,22 +8,33 @@
namespace skyline {
static constexpr size_t LockAttemptsPerYield{256};
static constexpr size_t LockAttemptsPerSleep{1024};
static constexpr size_t SleepDurationUs{1000};
static constexpr size_t SleepDurationUs{100};
void __attribute__ ((noinline)) SpinLock::LockSlow() {
// We need to start with attempt = 1, otherwise
// attempt % LockAttemptsPerSleep is zero for the first iteration.
size_t attempt{1};
while (true) {
if (!locked.test_and_set(std::memory_order_acquire))
return;
attempt++;
if (attempt % LockAttemptsPerSleep == 0)
std::this_thread::sleep_for(std::chrono::microseconds(100));
else if (attempt % LockAttemptsPerYield == 0)
template<typename Func>
void FalloffLock(Func &&func) {
for (size_t i{}; !func(); i++) {
if (i % LockAttemptsPerYield == 0)
std::this_thread::yield();
if (i % LockAttemptsPerSleep == 0)
std::this_thread::sleep_for(std::chrono::microseconds(SleepDurationUs));
}
}
void __attribute__ ((noinline)) SpinLock::LockSlow() {
FalloffLock([this] {
return try_lock();
});
}
void __attribute__ ((noinline)) SharedSpinLock::LockSlow() {
FalloffLock([this] {
return try_lock();
});
}
void __attribute__ ((noinline)) SharedSpinLock::LockSlowShared() {
FalloffLock([this] {
return try_lock_shared();
});
}
}

View File

@ -20,14 +20,14 @@ namespace skyline {
public:
void lock() {
if (!locked.test_and_set(std::memory_order_acquire)) [[likely]]
if (try_lock()) [[likely]]
return;
LockSlow();
}
bool try_lock() {
return !locked.test_and_set(std::memory_order_acquire);
return !locked.test_and_set(std::memory_order_acq_rel);
}
void unlock() {
@ -35,6 +35,59 @@ namespace skyline {
}
};
/**
* @brief Spinlock variant of std::shared_mutex
* @note This is loosely based on https://github.com/facebook/folly/blob/224350ea8c7c183312bec653e0d95a2b1e356ed7/folly/synchronization/RWSpinLock.h
*/
class SharedSpinLock {
private:
static constexpr u32 StateReader{2};
static constexpr u32 StateWriter{1};
std::atomic<u32> state{};
void LockSlow();
void LockSlowShared();
public:
void lock() {
if (try_lock()) [[likely]]
return;
LockSlow();
}
void lock_shared() {
if (try_lock_shared()) [[likely]]
return;
LockSlowShared();
}
bool try_lock() {
u32 expected{};
return state.compare_exchange_strong(expected, StateWriter, std::memory_order_acq_rel);
}
bool try_lock_shared() {
u32 value{state.fetch_add(StateReader, std::memory_order_acquire)};
if (value & StateWriter) {
state.fetch_add(-StateReader, std::memory_order_release);
return false;
}
return true;
}
void unlock() {
state.fetch_and(~StateWriter, std::memory_order_release);
}
void unlock_shared() {
state.fetch_add(-StateReader, std::memory_order_release);
}
};
/**
* @brief Recursive lock built ontop of `SpinLock`
* @note This should *ONLY* be used in situations where it is provably better than an std::mutex due to spinlocks having worse perfomance under heavy contention