mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-01 11:05:28 +03:00
Begin command buffers asynchronously in command executor
vkBeginCommandBuffer can take quite some time on adreno, move it to the cycle waiter thread where it won't block GPFIFO.
This commit is contained in:
parent
4b3e906c22
commit
1f9de17e98
@ -15,6 +15,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
outgoing{*state.settings->executorSlotCount},
|
outgoing{*state.settings->executorSlotCount},
|
||||||
thread{&CommandRecordThread::Run, this} {}
|
thread{&CommandRecordThread::Run, this} {}
|
||||||
|
|
||||||
|
CommandRecordThread::Slot::ScopedBegin::ScopedBegin(CommandRecordThread::Slot &slot) : slot{slot} {}
|
||||||
|
|
||||||
|
CommandRecordThread::Slot::ScopedBegin::~ScopedBegin() {
|
||||||
|
slot.Begin();
|
||||||
|
}
|
||||||
|
|
||||||
static vk::raii::CommandBuffer AllocateRaiiCommandBuffer(GPU &gpu, vk::raii::CommandPool &pool) {
|
static vk::raii::CommandBuffer AllocateRaiiCommandBuffer(GPU &gpu, vk::raii::CommandPool &pool) {
|
||||||
return {gpu.vkDevice, (*gpu.vkDevice).allocateCommandBuffers(
|
return {gpu.vkDevice, (*gpu.vkDevice).allocateCommandBuffers(
|
||||||
{
|
{
|
||||||
@ -35,14 +41,17 @@ namespace skyline::gpu::interconnect {
|
|||||||
commandBuffer{AllocateRaiiCommandBuffer(gpu, commandPool)},
|
commandBuffer{AllocateRaiiCommandBuffer(gpu, commandPool)},
|
||||||
fence{gpu.vkDevice, vk::FenceCreateInfo{ .flags = vk::FenceCreateFlagBits::eSignaled }},
|
fence{gpu.vkDevice, vk::FenceCreateInfo{ .flags = vk::FenceCreateFlagBits::eSignaled }},
|
||||||
semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}},
|
semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}},
|
||||||
cycle{std::make_shared<FenceCycle>(gpu.vkDevice, *fence, *semaphore, true)} {}
|
cycle{std::make_shared<FenceCycle>(gpu.vkDevice, *fence, *semaphore, true)} {
|
||||||
|
Begin();
|
||||||
|
}
|
||||||
|
|
||||||
CommandRecordThread::Slot::Slot(Slot &&other)
|
CommandRecordThread::Slot::Slot(Slot &&other)
|
||||||
: commandPool{std::move(other.commandPool)},
|
: commandPool{std::move(other.commandPool)},
|
||||||
commandBuffer{std::move(other.commandBuffer)},
|
commandBuffer{std::move(other.commandBuffer)},
|
||||||
fence{std::move(other.fence)},
|
fence{std::move(other.fence)},
|
||||||
semaphore{std::move(other.semaphore)},
|
semaphore{std::move(other.semaphore)},
|
||||||
cycle{std::move(other.cycle)} {}
|
cycle{std::move(other.cycle)},
|
||||||
|
ready{other.ready} {}
|
||||||
|
|
||||||
std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
|
std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
|
||||||
cycle->Wait();
|
cycle->Wait();
|
||||||
@ -51,6 +60,21 @@ namespace skyline::gpu::interconnect {
|
|||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandRecordThread::Slot::WaitReady() {
|
||||||
|
std::unique_lock lock{beginLock};
|
||||||
|
beginCondition.wait(lock, [this] { return ready; });
|
||||||
|
cycle->AttachObject(std::make_shared<ScopedBegin>(*this));
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandRecordThread::Slot::Begin() {
|
||||||
|
std::unique_lock lock{beginLock};
|
||||||
|
commandBuffer.begin(vk::CommandBufferBeginInfo{
|
||||||
|
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||||
|
});
|
||||||
|
ready = true;
|
||||||
|
beginCondition.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
void CommandRecordThread::ProcessSlot(Slot *slot) {
|
void CommandRecordThread::ProcessSlot(Slot *slot) {
|
||||||
TRACE_EVENT_FMT("gpu", "ProcessSlot: 0x{:X}, execution: {}", slot, slot->executionNumber);
|
TRACE_EVENT_FMT("gpu", "ProcessSlot: 0x{:X}, execution: {}", slot, slot->executionNumber);
|
||||||
auto &gpu{*state.gpu};
|
auto &gpu{*state.gpu};
|
||||||
@ -83,6 +107,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
|
|
||||||
slot->commandBuffer.end();
|
slot->commandBuffer.end();
|
||||||
|
slot->ready = false;
|
||||||
|
|
||||||
gpu.scheduler.SubmitCommandBuffer(slot->commandBuffer, slot->cycle);
|
gpu.scheduler.SubmitCommandBuffer(slot->commandBuffer, slot->cycle);
|
||||||
|
|
||||||
@ -404,9 +429,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
FinishRenderPass();
|
FinishRenderPass();
|
||||||
|
|
||||||
{
|
{
|
||||||
slot->commandBuffer.begin(vk::CommandBufferBeginInfo{
|
slot->WaitReady();
|
||||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
|
||||||
});
|
|
||||||
|
|
||||||
// We need this barrier here to ensure that resources are in the state we expect them to be in, we shouldn't overwrite resources while prior commands might still be using them or read from them while they might be modified by prior commands
|
// We need this barrier here to ensure that resources are in the state we expect them to be in, we shouldn't overwrite resources while prior commands might still be using them or read from them while they might be modified by prior commands
|
||||||
slot->commandBuffer.pipelineBarrier(
|
slot->commandBuffer.pipelineBarrier(
|
||||||
|
@ -19,6 +19,17 @@ namespace skyline::gpu::interconnect {
|
|||||||
* @brief Single execution slot, buffered back and forth between the GPFIFO thread and the record thread
|
* @brief Single execution slot, buffered back and forth between the GPFIFO thread and the record thread
|
||||||
*/
|
*/
|
||||||
struct Slot {
|
struct Slot {
|
||||||
|
/**
|
||||||
|
* @brief Helper to begin the slot command buffer on the cycle waiter thread
|
||||||
|
*/
|
||||||
|
struct ScopedBegin {
|
||||||
|
Slot &slot;
|
||||||
|
|
||||||
|
ScopedBegin(Slot &slot);
|
||||||
|
|
||||||
|
~ScopedBegin();
|
||||||
|
};
|
||||||
|
|
||||||
vk::raii::CommandPool commandPool; //!< Use one command pool per slot since command buffers from different slots may be recorded into on multiple threads at the same time
|
vk::raii::CommandPool commandPool; //!< Use one command pool per slot since command buffers from different slots may be recorded into on multiple threads at the same time
|
||||||
vk::raii::CommandBuffer commandBuffer;
|
vk::raii::CommandBuffer commandBuffer;
|
||||||
vk::raii::Fence fence;
|
vk::raii::Fence fence;
|
||||||
@ -26,7 +37,10 @@ namespace skyline::gpu::interconnect {
|
|||||||
std::shared_ptr<FenceCycle> cycle;
|
std::shared_ptr<FenceCycle> cycle;
|
||||||
boost::container::stable_vector<node::NodeVariant> nodes;
|
boost::container::stable_vector<node::NodeVariant> nodes;
|
||||||
LinearAllocatorState<> allocator;
|
LinearAllocatorState<> allocator;
|
||||||
|
std::mutex beginLock;
|
||||||
|
std::condition_variable beginCondition;
|
||||||
u32 executionNumber;
|
u32 executionNumber;
|
||||||
|
bool ready{}; //!< If this slot's command buffer has had 'beginCommandBuffer' called and is ready to have commands recorded into it
|
||||||
bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API
|
bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API
|
||||||
|
|
||||||
Slot(GPU &gpu);
|
Slot(GPU &gpu);
|
||||||
@ -38,6 +52,13 @@ namespace skyline::gpu::interconnect {
|
|||||||
* @note A new fence cycle for the reset command buffer
|
* @note A new fence cycle for the reset command buffer
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<FenceCycle> Reset(GPU &gpu);
|
std::shared_ptr<FenceCycle> Reset(GPU &gpu);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Waits for the command buffer to be began so it can be recorded into
|
||||||
|
*/
|
||||||
|
void WaitReady();
|
||||||
|
|
||||||
|
void Begin();
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
Loading…
Reference in New Issue
Block a user