diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 37213974..94f542a6 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -5,7 +5,7 @@ #include "command_executor.h" namespace skyline::gpu::interconnect { - CommandExecutor::CommandExecutor(const DeviceState &state) : gpu(*state.gpu), activeCommandBuffer(gpu.scheduler.AllocateCommandBuffer()), cycle(activeCommandBuffer.GetFenceCycle()), megaBuffer(gpu.buffer.AcquireMegaBuffer(cycle)) {} + CommandExecutor::CommandExecutor(const DeviceState &state) : gpu{*state.gpu}, activeCommandBuffer{gpu.scheduler.AllocateCommandBuffer()}, cycle{activeCommandBuffer.GetFenceCycle()}, megaBuffer{gpu.buffer.AcquireMegaBuffer(cycle)} {} CommandExecutor::~CommandExecutor() { cycle->Cancel(); @@ -168,67 +168,78 @@ namespace skyline::gpu::interconnect { flushCallbacks.emplace_back(std::forward(callback)); } - void CommandExecutor::Execute() { - if (!nodes.empty()) { - TRACE_EVENT("gpu", "CommandExecutor::Execute"); + void CommandExecutor::SubmitInternal() { + if (renderPass) + FinishRenderPass(); - if (renderPass) - FinishRenderPass(); + { + auto &commandBuffer{*activeCommandBuffer}; + commandBuffer.begin(vk::CommandBufferBeginInfo{ + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }); - { - auto &commandBuffer{*activeCommandBuffer}; - commandBuffer.begin(vk::CommandBufferBeginInfo{ - .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, - }); + for (auto texture : attachedTextures) { + texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true); + texture->MarkGpuDirty(); + } - for (auto texture : attachedTextures) { - texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true); - texture->MarkGpuDirty(); - } + for (const auto &delegate : attachedBuffers) + delegate->usageCallback = nullptr; - for (const auto &delegate : attachedBuffers) - delegate->usageCallback = nullptr; + vk::RenderPass lRenderPass; + u32 subpassIndex; - vk::RenderPass lRenderPass; - u32 subpassIndex; + using namespace node; + for (NodeVariant &node : nodes) { + #define NODE(name) [&](name& node) { node(commandBuffer, cycle, gpu); } + std::visit(VariantVisitor{ + NODE(FunctionNode), - using namespace node; - for (NodeVariant &node : nodes) { - #define NODE(name) [&](name& node) { node(commandBuffer, cycle, gpu); } - std::visit(VariantVisitor{ - NODE(FunctionNode), + [&](RenderPassNode &node) { + lRenderPass = node(commandBuffer, cycle, gpu); + subpassIndex = 0; + }, - [&](RenderPassNode &node) { - lRenderPass = node(commandBuffer, cycle, gpu); - subpassIndex = 0; - }, + [&](NextSubpassNode &node) { + node(commandBuffer, cycle, gpu); + ++subpassIndex; + }, + [&](SubpassFunctionNode &node) { node(commandBuffer, cycle, gpu, lRenderPass, subpassIndex); }, + [&](NextSubpassFunctionNode &node) { node(commandBuffer, cycle, gpu, lRenderPass, ++subpassIndex); }, - [&](NextSubpassNode &node) { - node(commandBuffer, cycle, gpu); - ++subpassIndex; - }, - [&](SubpassFunctionNode &node) { node(commandBuffer, cycle, gpu, lRenderPass, subpassIndex); }, - [&](NextSubpassFunctionNode &node) { node(commandBuffer, cycle, gpu, lRenderPass, ++subpassIndex); }, + NODE(RenderPassEndNode), + }, node); + #undef NODE + } - NODE(RenderPassEndNode), - }, node); - #undef NODE - } - - commandBuffer.end(); - gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence()); + commandBuffer.end(); + gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence()); for (const auto &delegate : attachedBuffers) delegate->view->megabufferOffset = 0; - nodes.clear(); - attachedTextures.clear(); - attachedBuffers.clear(); + nodes.clear(); + attachedTextures.clear(); + attachedBuffers.clear(); + } + } - cycle = activeCommandBuffer.Reset(); + void CommandExecutor::Submit() { + if (!nodes.empty()) { + TRACE_EVENT("gpu", "CommandExecutor::Submit"); + SubmitInternal(); + activeCommandBuffer = gpu.scheduler.AllocateCommandBuffer(); + cycle = activeCommandBuffer.GetFenceCycle(); + megaBuffer = gpu.buffer.AcquireMegaBuffer(cycle); + } + } - megaBuffer.Reset(); - } + void CommandExecutor::SubmitWithFlush() { + if (!nodes.empty()) { + TRACE_EVENT("gpu", "CommandExecutor::SubmitWithFlush"); + SubmitInternal(); + cycle = activeCommandBuffer.Reset(); + megaBuffer.Reset(); } } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 52c6d520..57dc7602 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -43,6 +43,12 @@ namespace skyline::gpu::interconnect { */ void FinishRenderPass(); + /** + * @brief Execute all the nodes and submit the resulting command buffer to the GPU + * @note It is the responsibility of the caller to handle resetting of command buffers, fence cycle and megabuffers + */ + void SubmitInternal(); + public: std::shared_ptr cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU @@ -102,6 +108,11 @@ namespace skyline::gpu::interconnect { /** * @brief Execute all the nodes and submit the resulting command buffer to the GPU */ - void Execute(); + void Submit(); + + /** + * @brief Execute all the nodes and submit the resulting command buffer to the GPU then wait for the completion of the command buffer + */ + void SubmitWithFlush(); }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index fec76944..a057dd47 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -630,7 +630,7 @@ namespace skyline::gpu::interconnect { T object; std::scoped_lock lock{view}; view.Read(pExecutor.cycle, []() { - // TODO: here we should trigger an execute, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case + // TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented"); }, span(object).template cast(), dstOffset); return object; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp index 95d3ab53..155ac914 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp @@ -19,7 +19,7 @@ namespace skyline::soc::gm20b::engine { ENGINE_STRUCT_CASE(syncpoint, action, { if (action.operation == Registers::Syncpoint::Operation::Incr) { Logger::Debug("Increment syncpoint: {}", +action.index); - channelCtx.executor.Execute(); + channelCtx.executor.Submit(); syncpoints.at(action.index).Increment(); } else if (action.operation == Registers::Syncpoint::Operation::Wait) { Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 344b0c7f..cfc4dcb8 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -656,7 +656,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { ENGINE_CASE(syncpointAction, { Logger::Debug("Increment syncpoint: {}", static_cast(syncpointAction.id)); - channelCtx.executor.Execute(); + channelCtx.executor.Submit(); syncpoints.at(syncpointAction.id).Increment(); }) diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 67e8a804..3146a235 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -36,7 +36,7 @@ namespace skyline::soc::gm20b::engine { return; } - executor.Execute(); + executor.SubmitWithFlush(); if (registers.launchDma->multiLineEnable) { if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch && registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)