diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 7ff5c92e..64194530 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -171,23 +171,18 @@ namespace skyline::gpu { std::memcpy(backing.data() + offset, data.data(), data.size()); } - std::shared_ptr Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) { - for (auto viewIt{views.begin()}; viewIt != views.end();) { - auto view{viewIt->lock()}; - if (view && view->offset == offset && view->range == range && view->format == format) - return view; - else if (!view) - viewIt = views.erase(viewIt); - else - ++viewIt; - } + Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : offset(offset), range(range), format(format) {} - auto view{std::make_shared(shared_from_this(), offset, range, format)}; - views.push_back(view); - return view; + BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) { + for (auto &view : views) + if (view.offset == offset && view.range == range && view.format == format) + return BufferView{shared_from_this(), &view}; + + views.emplace_back(offset, range, format); + return BufferView{shared_from_this(), &views.back()}; } - BufferView::BufferView(std::shared_ptr backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : buffer(std::move(backing)), offset(offset), range(range), format(format) {} + BufferView::BufferView(std::shared_ptr buffer, Buffer::BufferViewStorage *view) : buffer(buffer), view(view) {} void BufferView::lock() { auto backing{std::atomic_load(&buffer)}; diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index ed1abdde..a7d7973b 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -44,7 +44,18 @@ namespace skyline::gpu { GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer - std::vector> views; //!< BufferView(s) that are backed by this Buffer, used for repointing to a new Buffer on deletion + /** + * @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s) + */ + struct BufferViewStorage { + public: + vk::DeviceSize offset; + vk::DeviceSize range; + vk::Format format; + + BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format); + }; + std::list views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion friend BufferView; friend BufferManager; @@ -138,8 +149,9 @@ namespace skyline::gpu { /** * @return A cached or newly created view into this buffer with the supplied attributes + * @note The buffer **must** be locked prior to calling this */ - std::shared_ptr GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {}); + BufferView GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {}); }; /** @@ -147,16 +159,25 @@ namespace skyline::gpu { * @note The object **must** be locked prior to accessing any members as values will be mutated * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - struct BufferView : public FenceCycleDependency, public std::enable_shared_from_this { + struct BufferView { std::shared_ptr buffer; - vk::DeviceSize offset; - vk::DeviceSize range; - vk::Format format; + Buffer::BufferViewStorage *view; - /** - * @note A view must **NOT** be constructed directly, it should always be retrieved using Buffer::GetView - */ - BufferView(std::shared_ptr backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format); + BufferView(std::shared_ptr buffer, Buffer::BufferViewStorage *view); + + constexpr BufferView(nullptr_t = nullptr) : buffer(nullptr), view(nullptr) {} + + constexpr operator bool() const { + return view != nullptr; + } + + constexpr Buffer::BufferViewStorage *operator->() { + return view; + } + + operator std::shared_ptr() { + return buffer; + } /** * @brief Acquires an exclusive lock on the buffer for the calling thread diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp index 147f0824..1cadc117 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp @@ -8,7 +8,7 @@ namespace skyline::gpu { BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {} - std::shared_ptr BufferManager::FindOrCreate(const GuestBuffer &guest) { + BufferView BufferManager::FindOrCreate(const GuestBuffer &guest) { auto guestMapping{guest.mappings.front()}; /* diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.h b/app/src/main/cpp/skyline/gpu/buffer_manager.h index 0b1d40d8..97cc8671 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.h +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.h @@ -37,6 +37,6 @@ namespace skyline::gpu { /** * @return A pre-existing or newly created Buffer object which covers the supplied mappings */ - std::shared_ptr FindOrCreate(const GuestBuffer &guest); + BufferView FindOrCreate(const GuestBuffer &guest); }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index e8f589c7..57d6af06 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -35,12 +35,12 @@ namespace skyline::gpu::interconnect { cycle->AttachObject(view->shared_from_this()); } - void CommandExecutor::AttachBuffer(BufferView *view) { - auto buffer{view->buffer.get()}; + void CommandExecutor::AttachBuffer(BufferView view) { + auto buffer{view.buffer.get()}; if (!syncBuffers.contains(buffer)) { buffer->WaitOnFence(); buffer->cycle = cycle; - cycle->AttachObject(view->shared_from_this()); + cycle->AttachObject(view); syncBuffers.emplace(buffer); } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 6295d369..b6dce15d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -44,7 +44,7 @@ namespace skyline::gpu::interconnect { * @note The supplied buffer **must** be locked by the calling thread * @note This'll automatically handle syncing of the buffer in the most optimal way possible */ - void AttachBuffer(BufferView *view); + void AttachBuffer(BufferView view); /** * @brief Attach the lifetime of the fence cycle dependency to the command buffer diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 37037ae3..f97f4c1e 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -572,7 +572,7 @@ namespace skyline::gpu::interconnect { IOVA iova; u32 size; GuestBuffer guest; - std::shared_ptr view; + BufferView view; /** * @brief Reads an object from the supplied offset in the constant buffer @@ -603,8 +603,8 @@ namespace skyline::gpu::interconnect { */ template void Write(T &object, size_t offset) { - std::lock_guard lock{*view}; - view->buffer->Write(span(object).template cast(), view->offset + offset); + std::scoped_lock lock{view}; + view.buffer->Write(span(object).template cast(), view->offset + offset); } }; ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it @@ -612,17 +612,17 @@ namespace skyline::gpu::interconnect { public: void SetConstantBufferSelectorSize(u32 size) { constantBufferSelector.size = size; - constantBufferSelector.view.reset(); + constantBufferSelector.view = {}; } void SetConstantBufferSelectorIovaHigh(u32 high) { constantBufferSelector.iova.high = high; - constantBufferSelector.view.reset(); + constantBufferSelector.view = {}; } void SetConstantBufferSelectorIovaLow(u32 low) { constantBufferSelector.iova.low = low; - constantBufferSelector.view.reset(); + constantBufferSelector.view = {}; } std::optional GetConstantBufferSelector() { @@ -915,13 +915,13 @@ namespace skyline::gpu::interconnect { }); auto view{pipelineStage.constantBuffers[constantBuffer.index].view}; - std::scoped_lock lock(*view); + std::scoped_lock lock(view); bufferInfo.push_back(vk::DescriptorBufferInfo{ - .buffer = view->buffer->GetBacking(), + .buffer = view.buffer->GetBacking(), .offset = view->offset, .range = view->range, }); - executor.AttachBuffer(view.get()); + executor.AttachBuffer(view); } } @@ -1423,7 +1423,7 @@ namespace skyline::gpu::interconnect { vk::VertexInputBindingDescription bindingDescription{}; vk::VertexInputBindingDivisorDescriptionEXT bindingDivisorDescription{}; IOVA start{}, end{}; //!< IOVAs covering a contiguous region in GPU AS with the vertex buffer - std::shared_ptr view; + BufferView view; }; std::array vertexBuffers{}; @@ -1445,25 +1445,25 @@ namespace skyline::gpu::interconnect { void SetVertexBufferStartIovaHigh(u32 index, u32 high) { auto &vertexBuffer{vertexBuffers[index]}; vertexBuffer.start.high = high; - vertexBuffer.view.reset(); + vertexBuffer.view = {}; } void SetVertexBufferStartIovaLow(u32 index, u32 low) { auto &vertexBuffer{vertexBuffers[index]}; vertexBuffer.start.low = low; - vertexBuffer.view.reset(); + vertexBuffer.view = {}; } void SetVertexBufferEndIovaHigh(u32 index, u32 high) { auto &vertexBuffer{vertexBuffers[index]}; vertexBuffer.end.high = high; - vertexBuffer.view.reset(); + vertexBuffer.view = {}; } void SetVertexBufferEndIovaLow(u32 index, u32 low) { auto &vertexBuffer{vertexBuffers[index]}; vertexBuffer.end.low = low; - vertexBuffer.view.reset(); + vertexBuffer.view = {}; } void SetVertexBufferDivisor(u32 index, u32 divisor) { @@ -1578,19 +1578,19 @@ namespace skyline::gpu::interconnect { } } - BufferView *GetVertexBuffer(size_t index) { + BufferView GetVertexBuffer(size_t index) { auto &vertexBuffer{vertexBuffers.at(index)}; if (vertexBuffer.start > vertexBuffer.end || vertexBuffer.start == 0 || vertexBuffer.end == 0) return nullptr; else if (vertexBuffer.view) - return vertexBuffer.view.get(); + return vertexBuffer.view; GuestBuffer guest; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(vertexBuffer.start, (vertexBuffer.end + 1) - vertexBuffer.start)}; guest.mappings.assign(mappings.begin(), mappings.end()); vertexBuffer.view = gpu.buffer.FindOrCreate(guest); - return vertexBuffer.view.get(); + return vertexBuffer.view; } /* Input Assembly */ @@ -1638,7 +1638,7 @@ namespace skyline::gpu::interconnect { IOVA start{}, end{}; //!< IOVAs covering a contiguous region in GPU AS containing the index buffer (end does not represent the true extent of the index buffers, just a maximum possible extent and is set to extremely high values which cannot be used to create a buffer) vk::IndexType type{}; vk::DeviceSize viewSize{}; //!< The size of the cached view - std::shared_ptr view{}; //!< A cached view tied to the IOVAs and size to allow for a faster lookup + BufferView view{}; //!< A cached view tied to the IOVAs and size to allow for a faster lookup vk::DeviceSize GetIndexBufferSize(u32 elementCount) { switch (type) { @@ -2120,22 +2120,22 @@ namespace skyline::gpu::interconnect { public: void SetIndexBufferStartIovaHigh(u32 high) { indexBuffer.start.high = high; - indexBuffer.view.reset(); + indexBuffer.view = {}; } void SetIndexBufferStartIovaLow(u32 low) { indexBuffer.start.low = low; - indexBuffer.view.reset(); + indexBuffer.view = {}; } void SetIndexBufferEndIovaHigh(u32 high) { indexBuffer.end.high = high; - indexBuffer.view.reset(); + indexBuffer.view = {}; } void SetIndexBufferEndIovaLow(u32 low) { indexBuffer.end.low = low; - indexBuffer.view.reset(); + indexBuffer.view = {}; } void SetIndexBufferFormat(maxwell3d::IndexBuffer::Format format) { @@ -2155,22 +2155,22 @@ namespace skyline::gpu::interconnect { if (indexBuffer.type == vk::IndexType::eUint8EXT && !gpu.traits.supportsUint8Indices) throw exception("Cannot use U8 index buffer without host GPU support"); - indexBuffer.view.reset(); + indexBuffer.view = {}; } - BufferView *GetIndexBuffer(u32 elementCount) { + BufferView GetIndexBuffer(u32 elementCount) { auto size{indexBuffer.GetIndexBufferSize(elementCount)}; if (indexBuffer.start > indexBuffer.end || indexBuffer.start == 0 || indexBuffer.end == 0 || size == 0) return nullptr; else if (indexBuffer.view && size == indexBuffer.viewSize) - return indexBuffer.view.get(); + return indexBuffer.view; GuestBuffer guestBuffer; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(indexBuffer.start, size)}; guestBuffer.mappings.assign(mappings.begin(), mappings.end()); indexBuffer.view = gpu.buffer.FindOrCreate(guestBuffer); - return indexBuffer.view.get(); + return indexBuffer.view; } /* Depth */ @@ -2391,10 +2391,10 @@ namespace skyline::gpu::interconnect { vk::IndexType indexBufferType; if constexpr (IsIndexed) { auto indexBufferView{GetIndexBuffer(count)}; - std::scoped_lock lock(*indexBufferView); + std::scoped_lock lock(indexBufferView); executor.AttachBuffer(indexBufferView); - indexBufferHandle = indexBufferView->buffer->GetBacking(); + indexBufferHandle = indexBufferView.buffer->GetBacking(); indexBufferOffset = indexBufferView->offset; indexBufferType = indexBuffer.type; } @@ -2414,8 +2414,8 @@ namespace skyline::gpu::interconnect { if (vertexBuffer.bindingDescription.inputRate == vk::VertexInputRate::eInstance) vertexBindingDivisorsDescriptions.push_back(vertexBuffer.bindingDivisorDescription); - std::scoped_lock vertexBufferLock(*vertexBufferView); - vertexBufferHandles[index] = vertexBufferView->buffer->GetBacking(); + std::scoped_lock vertexBufferLock(vertexBufferView); + vertexBufferHandles[index] = vertexBufferView.buffer->GetBacking(); vertexBufferOffsets[index] = vertexBufferView->offset; executor.AttachBuffer(vertexBufferView); }