Implement a shared spinlock and use it for GPU VMM

2025-07-22 08:56:14 +03:00 · 2023-01-09 21:15:55 +00:00 · 2023-01-09 21:15:55 +00:00 · 85a23e73ba
commit 85a23e73ba
parent fd5c141dbf
3 changed files with 80 additions and 16 deletions
--- a/app/src/main/cpp/skyline/common/address_space.h
+++ b/app/src/main/cpp/skyline/common/address_space.h
@ -55,7 +55,7 @@ namespace skyline {
            }
        };

-        std::shared_mutex blockMutex;
+        SharedSpinLock blockMutex;
        std::vector<Block> blocks{Block{}};

        /**
--- a/app/src/main/cpp/skyline/common/spin_lock.cpp
+++ b/app/src/main/cpp/skyline/common/spin_lock.cpp
@ -8,22 +8,33 @@
 namespace skyline {
    static constexpr size_t LockAttemptsPerYield{256};
    static constexpr size_t LockAttemptsPerSleep{1024};
-    static constexpr size_t SleepDurationUs{1000};
+    static constexpr size_t SleepDurationUs{100};

-    void  __attribute__ ((noinline)) SpinLock::LockSlow() {
-        // We need to start with attempt = 1, otherwise
-        // attempt % LockAttemptsPerSleep is zero for the first iteration.
-        size_t attempt{1};
-        while (true) {
-            if (!locked.test_and_set(std::memory_order_acquire))
-                return;
-
-            attempt++;
-            if (attempt % LockAttemptsPerSleep == 0)
-                std::this_thread::sleep_for(std::chrono::microseconds(100));
-            else if (attempt % LockAttemptsPerYield == 0)
+    template<typename Func>
+    void FalloffLock(Func &&func) {
+        for (size_t i{}; !func(); i++) {
+            if (i % LockAttemptsPerYield == 0)
                std::this_thread::yield();
+            if (i % LockAttemptsPerSleep == 0)
+                std::this_thread::sleep_for(std::chrono::microseconds(SleepDurationUs));
        }
    }

+    void  __attribute__ ((noinline)) SpinLock::LockSlow() {
+        FalloffLock([this] {
+            return try_lock();
+        });
+    }
+
+    void  __attribute__ ((noinline)) SharedSpinLock::LockSlow() {
+        FalloffLock([this] {
+            return try_lock();
+        });
+    }
+
+    void  __attribute__ ((noinline)) SharedSpinLock::LockSlowShared() {
+        FalloffLock([this] {
+            return try_lock_shared();
+        });
+    }
 }
--- a/app/src/main/cpp/skyline/common/spin_lock.h
+++ b/app/src/main/cpp/skyline/common/spin_lock.h
@ -20,14 +20,14 @@ namespace skyline {

      public:
        void lock() {
-            if (!locked.test_and_set(std::memory_order_acquire)) [[likely]]
+            if (try_lock()) [[likely]]
                return;

            LockSlow();
        }

        bool try_lock() {
-            return !locked.test_and_set(std::memory_order_acquire);
+            return !locked.test_and_set(std::memory_order_acq_rel);
        }

        void unlock() {
@ -35,6 +35,59 @@ namespace skyline {
        }
    };

+    /**
+     * @brief Spinlock variant of std::shared_mutex
+     * @note This is loosely based on https://github.com/facebook/folly/blob/224350ea8c7c183312bec653e0d95a2b1e356ed7/folly/synchronization/RWSpinLock.h
+     */
+    class SharedSpinLock {
+      private:
+        static constexpr u32 StateReader{2};
+        static constexpr u32 StateWriter{1};
+
+        std::atomic<u32> state{};
+
+        void LockSlow();
+
+        void LockSlowShared();
+
+      public:
+        void lock() {
+            if (try_lock()) [[likely]]
+                return;
+
+            LockSlow();
+        }
+
+        void lock_shared() {
+            if (try_lock_shared()) [[likely]]
+                return;
+
+            LockSlowShared();
+        }
+
+        bool try_lock() {
+            u32 expected{};
+            return state.compare_exchange_strong(expected, StateWriter, std::memory_order_acq_rel);
+        }
+
+        bool try_lock_shared() {
+            u32 value{state.fetch_add(StateReader, std::memory_order_acquire)};
+            if (value & StateWriter) {
+                state.fetch_add(-StateReader, std::memory_order_release);
+                return false;
+            }
+            return true;
+        }
+
+        void unlock() {
+            state.fetch_and(~StateWriter, std::memory_order_release);
+        }
+
+        void unlock_shared() {
+            state.fetch_add(-StateReader, std::memory_order_release);
+        }
+    };
+
    /**
     * @brief Recursive lock built ontop of `SpinLock`
     * @note This should *ONLY* be used in situations where it is provably better than an std::mutex due to spinlocks having worse perfomance under heavy contention