diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index d017a886..9dbe127f 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -45,12 +45,29 @@ namespace skyline::gpu { } void Buffer::MarkGpuDirty() { - if (dirtyState == DirtyState::GpuDirty) + if (dirtyState == DirtyState::GpuDirty || externallySynchronized) { + externallySynchronized = false; // We want to handle synchronize internally after the GPU work is done return; + } gpu.state.nce->RetrapRegions(*trapHandle, false); dirtyState = DirtyState::GpuDirty; } + void Buffer::MarkExternallySynchronized() { + TRACE_EVENT("gpu", "Buffer::MarkExternallySynchronized"); + if (externallySynchronized) + return; + + if (dirtyState == DirtyState::GpuDirty) + std::memcpy(mirror.data(), backing.data(), mirror.size()); + else if (dirtyState == DirtyState::CpuDirty) + std::memcpy(backing.data(), mirror.data(), mirror.size()); + + dirtyState = DirtyState::GpuDirty; // Any synchronization will take place on the GPU which in itself would make the buffer dirty + gpu.state.nce->RetrapRegions(*trapHandle, false); + externallySynchronized = true; + } + void Buffer::WaitOnFence() { TRACE_EVENT("gpu", "Buffer::WaitOnFence"); @@ -67,6 +84,9 @@ namespace skyline::gpu { WaitOnFence(); + if (externallySynchronized) + return; // If the buffer is externally synchronized, we don't need to synchronize it + TRACE_EVENT("gpu", "Buffer::SynchronizeHost"); std::memcpy(backing.data(), mirror.data(), mirror.size()); @@ -81,12 +101,15 @@ namespace skyline::gpu { } void Buffer::SynchronizeHostWithCycle(const std::shared_ptr &pCycle, bool rwTrap) { - if (dirtyState != DirtyState::CpuDirty || !guest) + if (dirtyState != DirtyState::CpuDirty || !guest || externallySynchronized) return; if (!cycle.owner_before(pCycle)) WaitOnFence(); + if (externallySynchronized) + return; + TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle"); std::memcpy(backing.data(), mirror.data(), mirror.size()); @@ -101,12 +124,15 @@ namespace skyline::gpu { } void Buffer::SynchronizeGuest(bool skipTrap, bool skipFence) { - if (dirtyState != DirtyState::GpuDirty || !guest) + if (dirtyState != DirtyState::GpuDirty || !guest || externallySynchronized) return; // If the buffer has not been used on the GPU or there's no guest buffer, there is no need to synchronize it if (!skipFence) WaitOnFence(); + if (externallySynchronized) + return; // If the buffer is externally synchronized, we don't need to synchronize it + TRACE_EVENT("gpu", "Buffer::SynchronizeGuest"); std::memcpy(mirror.data(), backing.data(), mirror.size()); @@ -131,6 +157,9 @@ namespace skyline::gpu { }; void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr &pCycle) { + if (!guest) + return; // If there's no guest buffer, there is no need to synchronize it + if (!cycle.owner_before(pCycle)) WaitOnFence(); @@ -139,16 +168,16 @@ namespace skyline::gpu { } void Buffer::Read(span data, vk::DeviceSize offset) { - if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) + if (externallySynchronized || dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) std::memcpy(data.data(), mirror.data() + offset, data.size()); else if (dirtyState == DirtyState::GpuDirty) std::memcpy(data.data(), backing.data() + offset, data.size()); } - void Buffer::Write(span data, vk::DeviceSize offset, bool skipCleanHostWrite) { - if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) + void Buffer::Write(span data, vk::DeviceSize offset) { + if (externallySynchronized || dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) std::memcpy(mirror.data() + offset, data.data(), data.size()); - if ((!skipCleanHostWrite && dirtyState == DirtyState::Clean) || dirtyState == DirtyState::GpuDirty) + if (!externallySynchronized && ((dirtyState == DirtyState::Clean) || dirtyState == DirtyState::GpuDirty)) std::memcpy(backing.data() + offset, data.data(), data.size()); } @@ -234,7 +263,7 @@ namespace skyline::gpu { bufferDelegate->buffer->Read(data, offset + bufferDelegate->view->offset); } - void BufferView::Write(span data, vk::DeviceSize offset, bool skipCleanHostWrite) const { - bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset, skipCleanHostWrite); + void BufferView::Write(span data, vk::DeviceSize offset) const { + bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset); } } diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index c1ba6632..2e347fbf 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -31,6 +31,7 @@ namespace skyline::gpu { CpuDirty, //!< The CPU mappings have been modified but the GPU buffer is not up to date GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer + bool externallySynchronized{}; //!< Whether the host buffer is externally synchronized with the guest buffer, disables the buffer synchronization and aims to retain guest/host buffer data across buffer recreation public: /** @@ -123,11 +124,18 @@ namespace skyline::gpu { /** * @brief Marks the buffer as dirty on the GPU, it will be synced on the next call to SynchronizeGuest + * @note This clears the externally synchronized flag automatically * @note This **must** be called after syncing the buffer to the GPU not before * @note The buffer **must** be locked prior to calling this */ void MarkGpuDirty(); + /** + * @brief Marks the buffer as externally synchronized and automatically synchronizes the host buffer and guest buffer, ensuring the buffer is GPU dirty by the end of the current cycle is the responsibility of the API user + * @note The buffer **must** be locked and have the desired fence attached prior to calling this + */ + void MarkExternallySynchronized(); + /** * @brief Waits on a fence cycle if it exists till it's signalled and resets it after * @note The buffer **must** be locked prior to calling this @@ -166,14 +174,17 @@ namespace skyline::gpu { /** * @brief Reads data at the specified offset in the buffer + * @note The buffer **must** be locked prior to calling this + * @note If this buffer is externally synchronized, this will read exclusively from the guest buffer */ void Read(span data, vk::DeviceSize offset); /** * @brief Writes data at the specified offset in the buffer - * @param skipCleanHostWrite Skip writing to the host buffer if it's clean, assumes the buffer data will be synchronised externally + * @note The buffer **must** be locked prior to calling this + * @note If this buffer is externally synchronized, this will write to the guest buffer and not to the host buffer */ - void Write(span data, vk::DeviceSize offset, bool skipCleanHostWrite = false); + void Write(span data, vk::DeviceSize offset); /** * @return A cached or newly created view into this buffer with the supplied attributes @@ -245,14 +256,15 @@ namespace skyline::gpu { /** * @brief Reads data at the specified offset in the view * @note The view **must** be locked prior to calling this + * @note If this buffer is externally synchronized, this will read exclusively from the guest buffer */ void Read(span data, vk::DeviceSize offset) const; /** * @brief Writes data at the specified offset in the view * @note The view **must** be locked prior to calling this - * @param skipCleanHostWrite Skip writing to the host buffer if it's clean, assumes the buffer data will be synchronised externally + * @note If this buffer is externally synchronized, this will write to the guest buffer and not to the host buffer */ - void Write(span data, vk::DeviceSize offset, bool skipCleanHostWrite = false) const; + void Write(span data, vk::DeviceSize offset) const; }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 23597b66..ac20e34b 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -623,16 +623,6 @@ namespace skyline::gpu::interconnect { view.Read(span(object).template cast(), offset); return object; } - - /** - * @brief Writes an object to the supplied offset in the constant buffer - * @note This must only be called when the GuestBuffer is resolved correctly - */ - template - void Write(T &object, size_t offset) { - std::scoped_lock lock{view}; - view.Write(span(object).template cast(), offset, true); - } }; ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it @@ -700,10 +690,6 @@ namespace skyline::gpu::interconnect { if (!view) { auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)}; view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle); - { - std::scoped_lock lock{*view}; - view->bufferDelegate->buffer->SynchronizeHost(false); - } constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view); } @@ -714,11 +700,17 @@ namespace skyline::gpu::interconnect { void ConstantBufferUpdate(u32 data, u32 offset) { auto constantBuffer{GetConstantBufferSelector().value()}; - constantBuffer.Write(data, offset); + auto& constantBufferView{constantBuffer.view}; + { + std::scoped_lock lock{constantBufferView}; + executor.AttachBuffer(constantBufferView); + constantBufferView->buffer->MarkExternallySynchronized(); // We want to handle synchronization of updated constant buffers ourselves + constantBufferView.Write(span(data).cast(), offset); + } - executor.AddOutsideRpCommand([view = constantBuffer.view, data, offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &) { - std::scoped_lock lock{view}; - commandBuffer.updateBuffer(view.bufferDelegate->buffer->GetBacking(), view->view->offset + offset, vk::ArrayProxy(1, &data)); + executor.AddOutsideRpCommand([constantBufferView, data, offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &) { + std::scoped_lock lock{constantBufferView}; + commandBuffer.updateBuffer(constantBufferView->buffer->GetBacking(), constantBufferView->view->offset + offset, vk::ArrayProxy(1, &data)); }); } @@ -2583,6 +2575,7 @@ namespace skyline::gpu::interconnect { public: template void Draw(u32 count, u32 first, i32 vertexOffset = 0) { + // Draw state validation ValidatePrimitiveRestartState(); // Shader + Binding Setup