mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-16 10:37:55 +03:00
Impl preserve attached buffers/textures to avoid GPFIFO lock thrashing
When profiling SMO, it became obvious that the constant locking of textures and buffers in SyncDescriptors took up a large amount of CPU time (3-5%), a precious resource in intensive areas like Metro. This commit implements somewhat of a workaround to avoid constant relocking, if a buffer is frequently attached on the GPU and almost never used on the CPU we can keep the lock held between executions. Of course it's not that simple though, if the guest tries to lock a texture for the first time which has already been locked as preserve on the GPFIFO we need to avoid a deadlock. This is acheived through a combination of two things: first we periodically clear the locked attachments every 2*SlotCount submissions, preventing a complete deadlock on the CPU (just a long wait instead) and meaning that the next time the resource is attached on the GPU it will not be marked for preservation due to having been locked on the guest before; second, we always need to unlock everything when the GPU thread runs out of work, as the perioding clearing will not execute in this case which would otherwise leave the textures locked on the GPFIFO thread forever (if guest was waiting on a lock to submit work). It should be noted that we don't clear preserve attached resources in the latter scenario, only unlock them and then relock when more work is available.
This commit is contained in:
parent
0e8ccf1e99
commit
98c0cc3e7f
@ -345,7 +345,7 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
void Buffer::unlock() {
|
void Buffer::unlock() {
|
||||||
tag = ContextTag{};
|
tag = ContextTag{};
|
||||||
backingImmutability = BackingImmutability::None;
|
AllowAllBackingWrites();
|
||||||
mutex.unlock();
|
mutex.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ namespace skyline::gpu {
|
|||||||
class Buffer : public std::enable_shared_from_this<Buffer> {
|
class Buffer : public std::enable_shared_from_this<Buffer> {
|
||||||
private:
|
private:
|
||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
SpinLock mutex; //!< Synchronizes any mutations to the buffer or its backing
|
RecursiveSpinLock mutex; //!< Synchronizes any mutations to the buffer or its backing
|
||||||
std::atomic<ContextTag> tag{}; //!< The tag associated with the last lock call
|
std::atomic<ContextTag> tag{}; //!< The tag associated with the last lock call
|
||||||
memory::Buffer backing;
|
memory::Buffer backing;
|
||||||
std::optional<GuestBuffer> guest;
|
std::optional<GuestBuffer> guest;
|
||||||
@ -190,6 +190,11 @@ namespace skyline::gpu {
|
|||||||
backingImmutability = BackingImmutability::AllWrites;
|
backingImmutability = BackingImmutability::AllWrites;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AllowAllBackingWrites() {
|
||||||
|
std::scoped_lock lock{stateMutex};
|
||||||
|
backingImmutability = BackingImmutability::None;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return If sequenced writes to the backing must not occur on the CPU
|
* @return If sequenced writes to the backing must not occur on the CPU
|
||||||
* @note The buffer **must** be locked prior to calling this
|
* @note The buffer **must** be locked prior to calling this
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
|
#include <range/v3/view.hpp>
|
||||||
#include <loader/loader.h>
|
#include <loader/loader.h>
|
||||||
#include <gpu.h>
|
#include <gpu.h>
|
||||||
#include "command_executor.h"
|
#include "command_executor.h"
|
||||||
@ -234,8 +235,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
textureManagerLock.emplace(gpu.texture);
|
textureManagerLock.emplace(gpu.texture);
|
||||||
|
|
||||||
bool didLock{view->LockWithTag(tag)};
|
bool didLock{view->LockWithTag(tag)};
|
||||||
if (didLock)
|
if (didLock) {
|
||||||
|
if (view->texture->FrequentlyLocked())
|
||||||
attachedTextures.emplace_back(view->texture);
|
attachedTextures.emplace_back(view->texture);
|
||||||
|
else
|
||||||
|
preserveAttachedTextures.emplace_back(view->texture);
|
||||||
|
}
|
||||||
|
|
||||||
return didLock;
|
return didLock;
|
||||||
}
|
}
|
||||||
@ -259,8 +264,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
bufferManagerLock.emplace(gpu.buffer);
|
bufferManagerLock.emplace(gpu.buffer);
|
||||||
|
|
||||||
bool didLock{view.LockWithTag(tag)};
|
bool didLock{view.LockWithTag(tag)};
|
||||||
if (didLock)
|
if (didLock) {
|
||||||
|
if (view.GetBuffer()->FrequentlyLocked())
|
||||||
attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
||||||
|
else
|
||||||
|
preserveAttachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
||||||
|
}
|
||||||
return didLock;
|
return didLock;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,14 +280,20 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
if (lock.OwnsLock()) {
|
if (lock.OwnsLock()) {
|
||||||
// Transfer ownership to executor so that the resource will stay locked for the period it is used on the GPU
|
// Transfer ownership to executor so that the resource will stay locked for the period it is used on the GPU
|
||||||
|
if (view.GetBuffer()->FrequentlyLocked())
|
||||||
attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
||||||
|
else
|
||||||
|
preserveAttachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
||||||
lock.Release(); // The executor will handle unlocking the lock so it doesn't need to be handled here
|
lock.Release(); // The executor will handle unlocking the lock so it doesn't need to be handled here
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::AttachLockedBuffer(std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
void CommandExecutor::AttachLockedBuffer(std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
||||||
if (lock.OwnsLock()) {
|
if (lock.OwnsLock()) {
|
||||||
|
if (buffer->FrequentlyLocked())
|
||||||
attachedBuffers.emplace_back(std::move(buffer));
|
attachedBuffers.emplace_back(std::move(buffer));
|
||||||
|
else
|
||||||
|
preserveAttachedBuffers.emplace_back(std::move(buffer));
|
||||||
lock.Release(); // See AttachLockedBufferView(...)
|
lock.Release(); // See AttachLockedBufferView(...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -381,24 +396,25 @@ namespace skyline::gpu::interconnect {
|
|||||||
}, {}, {}
|
}, {}, {}
|
||||||
);
|
);
|
||||||
|
|
||||||
for (const auto &texture : attachedTextures)
|
boost::container::small_vector<FenceCycle *, 8> chainedCycles;
|
||||||
|
for (const auto &texture : ranges::views::concat(attachedTextures, preserveAttachedTextures)) {
|
||||||
texture->SynchronizeHostInline(slot->commandBuffer, cycle, true);
|
texture->SynchronizeHostInline(slot->commandBuffer, cycle, true);
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto &attachedBuffer : attachedBuffers)
|
|
||||||
if (attachedBuffer->SequencedCpuBackingWritesBlocked())
|
|
||||||
attachedBuffer->SynchronizeHost(); // Synchronize attached buffers from the CPU without using a staging buffer
|
|
||||||
|
|
||||||
for (const auto &attachedTexture : attachedTextures) {
|
|
||||||
// We don't need to attach the Texture to the cycle as a TextureView will already be attached
|
// We don't need to attach the Texture to the cycle as a TextureView will already be attached
|
||||||
cycle->ChainCycle(attachedTexture->cycle);
|
if (ranges::find(chainedCycles, texture->cycle.get()) == chainedCycles.end()) {
|
||||||
attachedTexture->cycle = cycle;
|
cycle->ChainCycle(texture->cycle);
|
||||||
|
chainedCycles.emplace_back(texture->cycle.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &attachedBuffer : attachedBuffers) {
|
texture->cycle = cycle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &attachedBuffer : ranges::views::concat(attachedBuffers, preserveAttachedBuffers)) {
|
||||||
if (attachedBuffer->RequiresCycleAttach()) {
|
if (attachedBuffer->RequiresCycleAttach()) {
|
||||||
|
attachedBuffer->SynchronizeHost(); // Synchronize attached buffers from the CPU without using a staging buffer
|
||||||
cycle->AttachObject(attachedBuffer.buffer);
|
cycle->AttachObject(attachedBuffer.buffer);
|
||||||
attachedBuffer->UpdateCycle(cycle);
|
attachedBuffer->UpdateCycle(cycle);
|
||||||
|
attachedBuffer->AllowAllBackingWrites();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -412,6 +428,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
bufferManagerLock.reset();
|
bufferManagerLock.reset();
|
||||||
megaBufferAllocatorLock.reset();
|
megaBufferAllocatorLock.reset();
|
||||||
allocator->Reset();
|
allocator->Reset();
|
||||||
|
|
||||||
|
// Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
|
||||||
|
if ((submissionNumber % CommandRecordThread::ActiveRecordSlots * 2) == 0) {
|
||||||
|
preserveAttachedBuffers.clear();
|
||||||
|
preserveAttachedTextures.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::Submit() {
|
void CommandExecutor::Submit() {
|
||||||
@ -423,7 +445,33 @@ namespace skyline::gpu::interconnect {
|
|||||||
if (!slot->nodes.empty()) {
|
if (!slot->nodes.empty()) {
|
||||||
TRACE_EVENT("gpu", "CommandExecutor::Submit");
|
TRACE_EVENT("gpu", "CommandExecutor::Submit");
|
||||||
SubmitInternal();
|
SubmitInternal();
|
||||||
|
submissionNumber++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ResetInternal();
|
ResetInternal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandExecutor::LockPreserve() {
|
||||||
|
if (!preserveLocked) {
|
||||||
|
preserveLocked = true;
|
||||||
|
|
||||||
|
for (auto &buffer : preserveAttachedBuffers)
|
||||||
|
buffer->LockWithTag(tag);
|
||||||
|
|
||||||
|
for (auto &texture : preserveAttachedTextures)
|
||||||
|
texture->LockWithTag(tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandExecutor::UnlockPreserve() {
|
||||||
|
if (preserveLocked) {
|
||||||
|
for (auto &buffer : preserveAttachedBuffers)
|
||||||
|
buffer->unlock();
|
||||||
|
|
||||||
|
for (auto &texture : preserveAttachedTextures)
|
||||||
|
texture->unlock();
|
||||||
|
|
||||||
|
preserveLocked = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
*/
|
*/
|
||||||
class CommandRecordThread {
|
class CommandRecordThread {
|
||||||
public:
|
public:
|
||||||
|
static constexpr size_t ActiveRecordSlots{6}; //!< Maximum number of simultaneously active slots
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Single execution slot, buffered back and forth between the GPFIFO thread and the record thread
|
* @brief Single execution slot, buffered back and forth between the GPFIFO thread and the record thread
|
||||||
*/
|
*/
|
||||||
@ -40,7 +42,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
const DeviceState &state;
|
const DeviceState &state;
|
||||||
std::thread thread;
|
std::thread thread;
|
||||||
|
|
||||||
static constexpr size_t ActiveRecordSlots{6}; //!< Maximum number of simultaneously active slots
|
|
||||||
CircularQueue<Slot *> incoming{ActiveRecordSlots}; //!< Slots pending recording
|
CircularQueue<Slot *> incoming{ActiveRecordSlots}; //!< Slots pending recording
|
||||||
CircularQueue<Slot *> outgoing{ActiveRecordSlots}; //!< Slots that have been submitted, may still be active on the GPU
|
CircularQueue<Slot *> outgoing{ActiveRecordSlots}; //!< Slots that have been submitted, may still be active on the GPU
|
||||||
|
|
||||||
@ -76,6 +77,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
std::optional<std::scoped_lock<TextureManager>> textureManagerLock; //!< The lock on the texture manager, this is locked for the duration of the command execution from the first usage inside an execution to the submission
|
std::optional<std::scoped_lock<TextureManager>> textureManagerLock; //!< The lock on the texture manager, this is locked for the duration of the command execution from the first usage inside an execution to the submission
|
||||||
std::optional<std::scoped_lock<BufferManager>> bufferManagerLock; //!< The lock on the buffer manager, see above for details
|
std::optional<std::scoped_lock<BufferManager>> bufferManagerLock; //!< The lock on the buffer manager, see above for details
|
||||||
std::optional<std::scoped_lock<MegaBufferAllocator>> megaBufferAllocatorLock; //!< The lock on the megabuffer allocator, see above for details
|
std::optional<std::scoped_lock<MegaBufferAllocator>> megaBufferAllocatorLock; //!< The lock on the megabuffer allocator, see above for details
|
||||||
|
bool preserveLocked{};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief A wrapper of a Texture object that has been locked beforehand and must be unlocked afterwards
|
* @brief A wrapper of a Texture object that has been locked beforehand and must be unlocked afterwards
|
||||||
@ -94,6 +96,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
~LockedTexture();
|
~LockedTexture();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::vector<LockedTexture> preserveAttachedTextures;
|
||||||
std::vector<LockedTexture> attachedTextures; //!< All textures that are attached to the current execution
|
std::vector<LockedTexture> attachedTextures; //!< All textures that are attached to the current execution
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -113,6 +116,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
~LockedBuffer();
|
~LockedBuffer();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::vector<LockedBuffer> preserveAttachedBuffers;
|
||||||
std::vector<LockedBuffer> attachedBuffers; //!< All textures that are attached to the current execution
|
std::vector<LockedBuffer> attachedBuffers; //!< All textures that are attached to the current execution
|
||||||
|
|
||||||
|
|
||||||
@ -154,6 +158,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
||||||
LinearAllocatorState<> *allocator;
|
LinearAllocatorState<> *allocator;
|
||||||
ContextTag tag; //!< The tag associated with this command executor, any tagged resource locking must utilize this tag
|
ContextTag tag; //!< The tag associated with this command executor, any tagged resource locking must utilize this tag
|
||||||
|
size_t submissionNumber{};
|
||||||
u32 executionNumber{};
|
u32 executionNumber{};
|
||||||
|
|
||||||
CommandExecutor(const DeviceState &state);
|
CommandExecutor(const DeviceState &state);
|
||||||
@ -258,5 +263,17 @@ namespace skyline::gpu::interconnect {
|
|||||||
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
||||||
*/
|
*/
|
||||||
void Submit();
|
void Submit();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Locks all preserve attached buffers/textures
|
||||||
|
* @note This **MUST** be called before attaching any buffers/textures to an execution
|
||||||
|
*/
|
||||||
|
void LockPreserve();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Unlocks all preserve attached buffers/textures
|
||||||
|
* @note This **MUST** be called when there is no GPU work left to be done to avoid deadlocks where the guest will try to lock a buffer/texture but the GPFIFO thread has no work so won't periodically unlock it
|
||||||
|
*/
|
||||||
|
void UnlockPreserve();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <common/spin_lock.h>
|
||||||
#include <common/lockable_shared_ptr.h>
|
#include <common/lockable_shared_ptr.h>
|
||||||
#include <nce.h>
|
#include <nce.h>
|
||||||
#include <gpu/tag_allocator.h>
|
#include <gpu/tag_allocator.h>
|
||||||
@ -356,9 +357,9 @@ namespace skyline::gpu {
|
|||||||
class Texture : public std::enable_shared_from_this<Texture> {
|
class Texture : public std::enable_shared_from_this<Texture> {
|
||||||
private:
|
private:
|
||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing
|
RecursiveSpinLock mutex; //!< Synchronizes any mutations to the texture or its backing
|
||||||
std::atomic<ContextTag> tag{}; //!< The tag associated with the last lock call
|
std::atomic<ContextTag> tag{}; //!< The tag associated with the last lock call
|
||||||
std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in
|
std::condition_variable_any backingCondition; //!< Signalled when a valid backing has been swapped in
|
||||||
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
|
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
|
||||||
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
|
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user