mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-17 18:07:54 +03:00
Skip waiting on host GPU after command buffer submission
We waited on the host GPU after `Execute` but this isn't optimal as it causes a major stall on the CPU which can lead to several adverse effects such as downclocking by the governor and losing the opportunity to work in parallel with the GPU. This has now been fixed by splitting `Execute`'s functionality into two functions: `Submit` and `SubmitWithFlush` which both execute all nodes and submit the resulting command buffer to the GPU but flushing will wait on the GPU to complete while the non-flush variant will not wait and work ahead of the GPU.
This commit is contained in:
parent
5129d2ae78
commit
662ea532d8
@ -5,7 +5,7 @@
|
|||||||
#include "command_executor.h"
|
#include "command_executor.h"
|
||||||
|
|
||||||
namespace skyline::gpu::interconnect {
|
namespace skyline::gpu::interconnect {
|
||||||
CommandExecutor::CommandExecutor(const DeviceState &state) : gpu(*state.gpu), activeCommandBuffer(gpu.scheduler.AllocateCommandBuffer()), cycle(activeCommandBuffer.GetFenceCycle()), megaBuffer(gpu.buffer.AcquireMegaBuffer(cycle)) {}
|
CommandExecutor::CommandExecutor(const DeviceState &state) : gpu{*state.gpu}, activeCommandBuffer{gpu.scheduler.AllocateCommandBuffer()}, cycle{activeCommandBuffer.GetFenceCycle()}, megaBuffer{gpu.buffer.AcquireMegaBuffer(cycle)} {}
|
||||||
|
|
||||||
CommandExecutor::~CommandExecutor() {
|
CommandExecutor::~CommandExecutor() {
|
||||||
cycle->Cancel();
|
cycle->Cancel();
|
||||||
@ -168,10 +168,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
flushCallbacks.emplace_back(std::forward<decltype(callback)>(callback));
|
flushCallbacks.emplace_back(std::forward<decltype(callback)>(callback));
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::Execute() {
|
void CommandExecutor::SubmitInternal() {
|
||||||
if (!nodes.empty()) {
|
|
||||||
TRACE_EVENT("gpu", "CommandExecutor::Execute");
|
|
||||||
|
|
||||||
if (renderPass)
|
if (renderPass)
|
||||||
FinishRenderPass();
|
FinishRenderPass();
|
||||||
|
|
||||||
@ -224,11 +221,25 @@ namespace skyline::gpu::interconnect {
|
|||||||
nodes.clear();
|
nodes.clear();
|
||||||
attachedTextures.clear();
|
attachedTextures.clear();
|
||||||
attachedBuffers.clear();
|
attachedBuffers.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandExecutor::Submit() {
|
||||||
|
if (!nodes.empty()) {
|
||||||
|
TRACE_EVENT("gpu", "CommandExecutor::Submit");
|
||||||
|
SubmitInternal();
|
||||||
|
activeCommandBuffer = gpu.scheduler.AllocateCommandBuffer();
|
||||||
|
cycle = activeCommandBuffer.GetFenceCycle();
|
||||||
|
megaBuffer = gpu.buffer.AcquireMegaBuffer(cycle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandExecutor::SubmitWithFlush() {
|
||||||
|
if (!nodes.empty()) {
|
||||||
|
TRACE_EVENT("gpu", "CommandExecutor::SubmitWithFlush");
|
||||||
|
SubmitInternal();
|
||||||
cycle = activeCommandBuffer.Reset();
|
cycle = activeCommandBuffer.Reset();
|
||||||
|
|
||||||
megaBuffer.Reset();
|
megaBuffer.Reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
*/
|
*/
|
||||||
void FinishRenderPass();
|
void FinishRenderPass();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
||||||
|
* @note It is the responsibility of the caller to handle resetting of command buffers, fence cycle and megabuffers
|
||||||
|
*/
|
||||||
|
void SubmitInternal();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
||||||
MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU
|
MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU
|
||||||
@ -102,6 +108,11 @@ namespace skyline::gpu::interconnect {
|
|||||||
/**
|
/**
|
||||||
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
||||||
*/
|
*/
|
||||||
void Execute();
|
void Submit();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Execute all the nodes and submit the resulting command buffer to the GPU then wait for the completion of the command buffer
|
||||||
|
*/
|
||||||
|
void SubmitWithFlush();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -630,7 +630,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
T object;
|
T object;
|
||||||
std::scoped_lock lock{view};
|
std::scoped_lock lock{view};
|
||||||
view.Read(pExecutor.cycle, []() {
|
view.Read(pExecutor.cycle, []() {
|
||||||
// TODO: here we should trigger an execute, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case
|
// TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case
|
||||||
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
||||||
}, span<T>(object).template cast<u8>(), dstOffset);
|
}, span<T>(object).template cast<u8>(), dstOffset);
|
||||||
return object;
|
return object;
|
||||||
|
@ -19,7 +19,7 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
ENGINE_STRUCT_CASE(syncpoint, action, {
|
ENGINE_STRUCT_CASE(syncpoint, action, {
|
||||||
if (action.operation == Registers::Syncpoint::Operation::Incr) {
|
if (action.operation == Registers::Syncpoint::Operation::Incr) {
|
||||||
Logger::Debug("Increment syncpoint: {}", +action.index);
|
Logger::Debug("Increment syncpoint: {}", +action.index);
|
||||||
channelCtx.executor.Execute();
|
channelCtx.executor.Submit();
|
||||||
syncpoints.at(action.index).Increment();
|
syncpoints.at(action.index).Increment();
|
||||||
} else if (action.operation == Registers::Syncpoint::Operation::Wait) {
|
} else if (action.operation == Registers::Syncpoint::Operation::Wait) {
|
||||||
Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload);
|
Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload);
|
||||||
|
@ -656,7 +656,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
|
|
||||||
ENGINE_CASE(syncpointAction, {
|
ENGINE_CASE(syncpointAction, {
|
||||||
Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
|
Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
|
||||||
channelCtx.executor.Execute();
|
channelCtx.executor.Submit();
|
||||||
syncpoints.at(syncpointAction.id).Increment();
|
syncpoints.at(syncpointAction.id).Increment();
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
executor.Execute();
|
executor.SubmitWithFlush();
|
||||||
if (registers.launchDma->multiLineEnable) {
|
if (registers.launchDma->multiLineEnable) {
|
||||||
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch &&
|
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch &&
|
||||||
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
|
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user