From a5ca370c3687f38d733926770732a500e4a65474 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sun, 5 Jun 2022 13:02:33 +0530 Subject: [PATCH] Implement thread-safe MegaBuffer pool We currently have a global `MegaBuffer` instance that is shared across all channels, this is very problematic as `MegaBuffer` fundamentally works like a state machine with allocations (especially resetting/freeing) and is thread-specific. Therefore, we now have a pool of several `MegaBuffer`s which is allocated from by the `CommandExecutor` and kept channel specific as a result which also limits its usage to a single thread, this allows for individually resetting or freeing any allocations. --- app/src/main/cpp/skyline/gpu/buffer.cpp | 8 +- app/src/main/cpp/skyline/gpu/buffer.h | 5 +- .../main/cpp/skyline/gpu/buffer_manager.cpp | 88 ++++++++++++------- app/src/main/cpp/skyline/gpu/buffer_manager.h | 85 +++++++++++------- .../gpu/interconnect/command_executor.cpp | 4 +- .../gpu/interconnect/command_executor.h | 1 + .../gpu/interconnect/graphics_context.h | 14 +-- 7 files changed, 126 insertions(+), 79 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 0b9256c4..d6ab12ef 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -267,7 +267,7 @@ namespace skyline::gpu { return BufferView{shared_from_this(), &views.back()}; } - vk::DeviceSize Buffer::AcquireMegaBuffer() { + vk::DeviceSize Buffer::AcquireMegaBuffer(MegaBuffer& megaBuffer) { SynchronizeGuest(false, true); // First try and enable megabuffering by doing an immediate sync if (!megaBufferingEnabled) @@ -278,7 +278,7 @@ namespace skyline::gpu { if (megaBufferOffset) return megaBufferOffset; // If the current buffer contents haven't been changed since the last acquire, we can just return the existing offset - megaBufferOffset = gpu.buffer.megaBuffer.Push(backing, true); // Buffers are required to be page aligned in the megabuffer + megaBufferOffset = megaBuffer.Push(backing, true); // Buffers are required to be page aligned in the megabuffer return megaBufferOffset; } @@ -370,8 +370,8 @@ namespace skyline::gpu { bufferDelegate->buffer->Write(pCycle, flushHostCallback, gpuCopyCallback, data, offset + bufferDelegate->view->offset); } - vk::DeviceSize BufferView::AcquireMegaBuffer() const { - vk::DeviceSize bufferOffset{bufferDelegate->buffer->AcquireMegaBuffer()}; + vk::DeviceSize BufferView::AcquireMegaBuffer(MegaBuffer& megaBuffer) const { + vk::DeviceSize bufferOffset{bufferDelegate->buffer->AcquireMegaBuffer(megaBuffer)}; // Propagate 0 results since they signify that megabuffering isn't supported for a buffer if (bufferOffset) diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index 89c77175..465dee99 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -11,6 +11,7 @@ namespace skyline::gpu { struct BufferView; class BufferManager; + class MegaBuffer; /** * @brief A buffer which is backed by host constructs while being synchronized with the underlying guest buffer @@ -234,7 +235,7 @@ namespace skyline::gpu { * @note This will only push into the megabuffer when there have been modifications after the previous acquire, otherwise the previous offset will be reused * @note An implicit CPU -> GPU sync will be performed when calling this, an immediate GPU -> CPU sync will also be attempted if the buffer is GPU dirty in the hope that megabuffering can be reenabled */ - vk::DeviceSize AcquireMegaBuffer(); + vk::DeviceSize AcquireMegaBuffer(MegaBuffer& megaBuffer); /** * @brief Forces the buffer contents to be pushed into the megabuffer on the next AcquireMegaBuffer call @@ -333,7 +334,7 @@ namespace skyline::gpu { * @note The view **must** be locked prior to calling this * @note See Buffer::AcquireMegaBuffer */ - vk::DeviceSize AcquireMegaBuffer() const; + vk::DeviceSize AcquireMegaBuffer(MegaBuffer& megaBuffer) const; /** * @return A span of the backing buffer contents diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp index ea864404..b7ef029f 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp @@ -6,39 +6,7 @@ #include "buffer_manager.h" namespace skyline::gpu { - MegaBuffer::MegaBuffer(GPU &gpu) : backing(gpu.memory.AllocateBuffer(Size)), freeRegion(backing.subspan(PAGE_SIZE)) {} - - void MegaBuffer::Reset() { - std::scoped_lock lock{mutex}; - freeRegion = backing.subspan(PAGE_SIZE); - } - - vk::Buffer MegaBuffer::GetBacking() const { - return backing.vkBuffer; - } - - vk::DeviceSize MegaBuffer::Push(span data, bool pageAlign) { - std::scoped_lock lock{mutex}; - - if (data.size() > freeRegion.size()) - throw exception("Ran out of megabuffer space! Alloc size: 0x{:X}", data.size()); - - if (pageAlign) { - // If page aligned data was requested then align the free - auto alignedFreeBase{util::AlignUp(static_cast(freeRegion.data() - backing.data()), PAGE_SIZE)}; - freeRegion = backing.subspan(alignedFreeBase); - } - - // Allocate space for data from the free region - auto resultSpan{freeRegion.subspan(0, data.size())}; - resultSpan.copy_from(data); - - // Move the free region along - freeRegion = freeRegion.subspan(data.size()); - return static_cast(resultSpan.data() - backing.data()); - } - - BufferManager::BufferManager(GPU &gpu) : gpu(gpu), megaBuffer(gpu) {} + BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {} bool BufferManager::BufferLessThan(const std::shared_ptr &it, u8 *pointer) { return it->guest->begin().base() < pointer; @@ -109,4 +77,58 @@ namespace skyline::gpu { return newBuffer->GetView(static_cast(guestMapping.begin() - newBuffer->guest->begin()) + offset, size); } + + BufferManager::MegaBufferSlot::MegaBufferSlot(GPU &gpu) : backing(gpu.memory.AllocateBuffer(Size)) {} + + MegaBuffer::MegaBuffer(BufferManager::MegaBufferSlot &slot) : slot{slot}, freeRegion{slot.backing.subspan(PAGE_SIZE)} {} + + MegaBuffer::~MegaBuffer() { + slot.active.clear(std::memory_order_release); + } + + void MegaBuffer::Reset() { + freeRegion = slot.backing.subspan(PAGE_SIZE); + } + + vk::Buffer MegaBuffer::GetBacking() const { + return slot.backing.vkBuffer; + } + + vk::DeviceSize MegaBuffer::Push(span data, bool pageAlign) { + if (data.size() > freeRegion.size()) + throw exception("Ran out of megabuffer space! Alloc size: 0x{:X}", data.size()); + + if (pageAlign) { + // If page aligned data was requested then align the free + auto alignedFreeBase{util::AlignUp(static_cast(freeRegion.data() - slot.backing.data()), PAGE_SIZE)}; + freeRegion = slot.backing.subspan(alignedFreeBase); + } + + // Allocate space for data from the free region + auto resultSpan{freeRegion.subspan(0, data.size())}; + resultSpan.copy_from(data); + + // Move the free region along + freeRegion = freeRegion.subspan(data.size()); + return static_cast(resultSpan.data() - slot.backing.data()); + } + + MegaBuffer BufferManager::AcquireMegaBuffer(const std::shared_ptr &cycle) { + std::lock_guard lock{mutex}; + + for (auto &slot : megaBuffers) { + if (!slot.active.test_and_set(std::memory_order_acq_rel)) { + if (slot.cycle->Poll()) { + slot.cycle = cycle; + return {slot}; + } else { + slot.active.clear(std::memory_order_release); + } + } + } + + auto& megaBuffer{megaBuffers.emplace_back(gpu)}; + megaBuffer.cycle = cycle; + return {megaBuffer}; + } } diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.h b/app/src/main/cpp/skyline/gpu/buffer_manager.h index addfcbf1..6319ce7c 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.h +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.h @@ -6,19 +6,67 @@ #include "buffer.h" namespace skyline::gpu { + class MegaBuffer; + + /** + * @brief The Buffer Manager is responsible for maintaining a global view of buffers being mapped from the guest to the host, any lookups and creation of host buffer from equivalent guest buffer alongside reconciliation of any overlaps with existing textures + */ + class BufferManager { + private: + GPU &gpu; + std::mutex mutex; //!< Synchronizes access to the buffer mappings + std::vector> buffers; //!< A sorted vector of all buffer mappings + + friend class MegaBuffer; + + /** + * @brief A wrapper around a buffer which can be utilized as backing storage for a megabuffer and can track its state to avoid concurrent usage + */ + struct MegaBufferSlot { + std::atomic_flag active{true}; //!< If the megabuffer is currently being utilized, we want to construct a buffer as active + std::shared_ptr cycle; //!< The latest cycle on the fence, all waits must be performed through this + + constexpr static vk::DeviceSize Size{100 * 1024 * 1024}; //!< Size in bytes of the megabuffer (100MiB) + memory::Buffer backing; //!< The GPU buffer as the backing storage for the megabuffer + + MegaBufferSlot(GPU &gpu); + }; + + /** + * @return If the end of the supplied buffer is less than the supplied pointer + */ + static bool BufferLessThan(const std::shared_ptr &it, u8 *pointer); + + public: + std::list megaBuffers; //!< A pool of all allocated megabuffers, these are dynamically utilized + + BufferManager(GPU &gpu); + + /** + * @return A dynamically allocated megabuffer which can be used to store buffer modifications allowing them to be replayed in-sequence on the GPU + * @note This object **must** be destroyed to be reclaimed by the manager and prevent a memory leak + */ + MegaBuffer AcquireMegaBuffer(const std::shared_ptr &cycle); + + /** + * @return A pre-existing or newly created Buffer object which covers the supplied mappings + */ + BufferView FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr &cycle = nullptr); + }; + /** * @brief A simple linearly allocated GPU-side buffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU + * @note This class is **not** thread-safe and any calls must be externally synchronized */ class MegaBuffer { private: - constexpr static vk::DeviceSize Size{0x6'400'000}; //!< Size in bytes of the megabuffer (100MiB) - - memory::Buffer backing; //!< The backing GPU buffer - std::mutex mutex; //!< Synchronizes access to freeRegion - span freeRegion; //!< Span of unallocated space in the megabuffer + BufferManager::MegaBufferSlot &slot; + span freeRegion; //!< The unallocated space in the megabuffer public: - MegaBuffer(GPU &gpu); + MegaBuffer(BufferManager::MegaBufferSlot &slot); + + ~MegaBuffer(); /** * @brief Resets the free region of the megabuffer to its initial state, data is left intact but may be overwritten @@ -36,29 +84,4 @@ namespace skyline::gpu { */ vk::DeviceSize Push(span data, bool pageAlign = false); }; - - /** - * @brief The Buffer Manager is responsible for maintaining a global view of buffers being mapped from the guest to the host, any lookups and creation of host buffer from equivalent guest buffer alongside reconciliation of any overlaps with existing textures - */ - class BufferManager { - private: - GPU &gpu; - std::mutex mutex; //!< Synchronizes access to the buffer mappings - std::vector> buffers; //!< A sorted vector of all buffer mappings - - /** - * @return If the end of the supplied buffer is less than the supplied pointer - */ - static bool BufferLessThan(const std::shared_ptr &it, u8 *pointer); - - public: - MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU - - BufferManager(GPU &gpu); - - /** - * @return A pre-existing or newly created Buffer object which covers the supplied mappings - */ - BufferView FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr &cycle = nullptr); - }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 200eb1f2..e0235307 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -5,7 +5,7 @@ #include "command_executor.h" namespace skyline::gpu::interconnect { - CommandExecutor::CommandExecutor(const DeviceState &state) : gpu(*state.gpu), activeCommandBuffer(gpu.scheduler.AllocateCommandBuffer()), cycle(activeCommandBuffer.GetFenceCycle()) {} + CommandExecutor::CommandExecutor(const DeviceState &state) : gpu(*state.gpu), activeCommandBuffer(gpu.scheduler.AllocateCommandBuffer()), cycle(activeCommandBuffer.GetFenceCycle()), megaBuffer(gpu.buffer.AcquireMegaBuffer(cycle)) {} CommandExecutor::~CommandExecutor() { cycle->Cancel(); @@ -227,7 +227,7 @@ namespace skyline::gpu::interconnect { cycle = activeCommandBuffer.Reset(); - gpu.buffer.megaBuffer.Reset(); + megaBuffer.Reset(); } } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 5ea03ccf..52c6d520 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -45,6 +45,7 @@ namespace skyline::gpu::interconnect { public: std::shared_ptr cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands + MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU CommandExecutor(const DeviceState &state); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 38cb6958..4590ccf9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -738,7 +738,7 @@ namespace skyline::gpu::interconnect { void ConstantBufferUpdate(std::vector data, u32 offset) { auto constantBuffer{GetConstantBufferSelector().value()}; - constantBuffer.Write(executor, gpu.buffer.megaBuffer, data, offset); + constantBuffer.Write(executor, executor.megaBuffer, data, offset); } /* Shader Program */ @@ -1110,10 +1110,10 @@ namespace skyline::gpu::interconnect { auto view{pipelineStage.constantBuffers[constantBuffer.index].view}; std::scoped_lock lock(view); - if (auto megaBufferOffset{view.AcquireMegaBuffer()}) { + if (auto megaBufferOffset{view.AcquireMegaBuffer(executor.megaBuffer)}) { // If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage bufferDescriptors[bufferIndex] = vk::DescriptorBufferInfo{ - .buffer = gpu.buffer.megaBuffer.GetBacking(), + .buffer = executor.megaBuffer.GetBacking(), .offset = megaBufferOffset, .range = view->view->size }; @@ -2837,9 +2837,9 @@ namespace skyline::gpu::interconnect { std::scoped_lock lock(indexBufferView); boundIndexBuffer->type = indexBuffer.type; - if (auto megaBufferOffset{indexBufferView.AcquireMegaBuffer()}) { + if (auto megaBufferOffset{indexBufferView.AcquireMegaBuffer(executor.megaBuffer)}) { // If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage - boundIndexBuffer->handle = gpu.buffer.megaBuffer.GetBacking(); + boundIndexBuffer->handle = executor.megaBuffer.GetBacking(); boundIndexBuffer->offset = megaBufferOffset; } else { indexBufferView.RegisterUsage([=](const Buffer::BufferViewStorage &view, const std::shared_ptr &buffer) { @@ -2872,9 +2872,9 @@ namespace skyline::gpu::interconnect { std::scoped_lock vertexBufferLock(vertexBufferView); - if (auto megaBufferOffset{vertexBufferView.AcquireMegaBuffer()}) { + if (auto megaBufferOffset{vertexBufferView.AcquireMegaBuffer(executor.megaBuffer)}) { // If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage - boundVertexBuffers->handles[index] = gpu.buffer.megaBuffer.GetBacking(); + boundVertexBuffers->handles[index] = executor.megaBuffer.GetBacking(); boundVertexBuffers->offsets[index] = megaBufferOffset; } else { vertexBufferView.RegisterUsage([handle = boundVertexBuffers->handles.data() + index, offset = boundVertexBuffers->offsets.data() + index](const Buffer::BufferViewStorage &view, const std::shared_ptr &buffer) {