diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 64194530..33685229 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -8,45 +8,14 @@ #include "buffer.h" namespace skyline::gpu { - vk::DeviceSize GuestBuffer::BufferSize() const { - vk::DeviceSize size{}; - for (const auto &buffer : mappings) - size += buffer.size_bytes(); - return size; - } - void Buffer::SetupGuestMappings() { - auto &mappings{guest.mappings}; - if (mappings.size() == 1) { - auto mapping{mappings.front()}; - u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)}; - size_t alignedSize{static_cast(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)}; + u8 *alignedData{util::AlignDown(guest.data(), PAGE_SIZE)}; + size_t alignedSize{static_cast(util::AlignUp(guest.data() + guest.size(), PAGE_SIZE) - alignedData)}; - alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize); - mirror = alignedMirror.subspan(static_cast(mapping.data() - alignedData), mapping.size()); - } else { - std::vector> alignedMappings; + alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize); + mirror = alignedMirror.subspan(static_cast(guest.data() - alignedData), guest.size()); - const auto &frontMapping{mappings.front()}; - u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)}; - alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData); - - size_t totalSize{frontMapping.size()}; - for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) { - auto mappingSize{it->size()}; - alignedMappings.emplace_back(it->data(), mappingSize); - totalSize += mappingSize; - } - - const auto &backMapping{mappings.back()}; - totalSize += backMapping.size(); - alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE)); - - alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings); - mirror = alignedMirror.subspan(static_cast(frontMapping.data() - alignedData), totalSize); - } - - trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] { + trapHandle = gpu.state.nce->TrapRegions(guest, true, [this] { std::lock_guard lock(*this); SynchronizeGuest(true); // We can skip trapping since the caller will do it WaitOnFence(); @@ -58,7 +27,7 @@ namespace skyline::gpu { }); } - Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) { + Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), backing(gpu.memory.AllocateBuffer(guest.size())), guest(guest) { SetupGuestMappings(); } @@ -111,7 +80,7 @@ namespace skyline::gpu { if (dirtyState != DirtyState::CpuDirty) return; - if (pCycle != cycle.lock()) + if (!cycle.owner_before(pCycle)) WaitOnFence(); TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle"); @@ -127,11 +96,12 @@ namespace skyline::gpu { } } - void Buffer::SynchronizeGuest(bool skipTrap) { + void Buffer::SynchronizeGuest(bool skipTrap, bool skipFence) { if (dirtyState != DirtyState::GpuDirty) return; // If the buffer has not been used on the GPU, there is no need to synchronize it - WaitOnFence(); + if (!skipFence) + WaitOnFence(); TRACE_EVENT("gpu", "Buffer::SynchronizeGuest"); @@ -157,13 +127,20 @@ namespace skyline::gpu { }; void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr &pCycle) { - if (pCycle != cycle.lock()) + if (!cycle.owner_before(pCycle)) WaitOnFence(); pCycle->AttachObject(std::make_shared(shared_from_this())); cycle = pCycle; } + void Buffer::Read(span data, vk::DeviceSize offset) { + if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) + std::memcpy(data.data(), mirror.data() + offset, data.size()); + else if (dirtyState == DirtyState::GpuDirty) + std::memcpy(data.data(), backing.data() + offset, data.size()); + } + void Buffer::Write(span data, vk::DeviceSize offset) { if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) std::memcpy(mirror.data() + offset, data.data(), data.size()); @@ -171,51 +148,89 @@ namespace skyline::gpu { std::memcpy(backing.data() + offset, data.data(), data.size()); } - Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : offset(offset), range(range), format(format) {} + Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {} - BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) { - for (auto &view : views) - if (view.offset == offset && view.range == range && view.format == format) - return BufferView{shared_from_this(), &view}; - - views.emplace_back(offset, range, format); - return BufferView{shared_from_this(), &views.back()}; + Buffer::BufferDelegate::BufferDelegate(std::shared_ptr pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) { + iterator = buffer->delegates.emplace(buffer->delegates.end(), this); } - BufferView::BufferView(std::shared_ptr buffer, Buffer::BufferViewStorage *view) : buffer(buffer), view(view) {} + Buffer::BufferDelegate::~BufferDelegate() { + std::scoped_lock lock(*this); + buffer->delegates.erase(iterator); + } - void BufferView::lock() { - auto backing{std::atomic_load(&buffer)}; + void Buffer::BufferDelegate::lock() { + auto lBuffer{std::atomic_load(&buffer)}; while (true) { - backing->lock(); + lBuffer->lock(); auto latestBacking{std::atomic_load(&buffer)}; - if (backing == latestBacking) + if (lBuffer == latestBacking) return; - backing->unlock(); - backing = latestBacking; + lBuffer->unlock(); + lBuffer = latestBacking; } } - void BufferView::unlock() { + void Buffer::BufferDelegate::unlock() { buffer->unlock(); } - bool BufferView::try_lock() { - auto backing{std::atomic_load(&buffer)}; + bool Buffer::BufferDelegate::try_lock() { + auto lBuffer{std::atomic_load(&buffer)}; while (true) { - bool success{backing->try_lock()}; + bool success{lBuffer->try_lock()}; - auto latestBacking{std::atomic_load(&buffer)}; - if (backing == latestBacking) + auto latestBuffer{std::atomic_load(&buffer)}; + if (lBuffer == latestBuffer) // We want to ensure that the try_lock() was on the latest backing and not on an outdated one return success; if (success) // We only unlock() if the try_lock() was successful and we acquired the mutex - backing->unlock(); - backing = latestBacking; + lBuffer->unlock(); + lBuffer = latestBuffer; } } + + BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) { + for (auto &view : views) + if (view.offset == offset && view.size == size && view.format == format) + return BufferView{shared_from_this(), &view}; + + views.emplace_back(offset, size, format); + return BufferView{shared_from_this(), &views.back()}; + } + + BufferView::BufferView(std::shared_ptr buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared(std::move(buffer), view)) {} + + void BufferView::AttachCycle(const std::shared_ptr &cycle) { + auto buffer{bufferDelegate->buffer.get()}; + if (!buffer->cycle.owner_before(cycle)) { + buffer->WaitOnFence(); + buffer->cycle = cycle; + cycle->AttachObject(bufferDelegate); + } + } + + void BufferView::RegisterUsage(const std::function &)> &usageCallback) { + usageCallback(*bufferDelegate->view, bufferDelegate->buffer); + if (!bufferDelegate->usageCallback) { + bufferDelegate->usageCallback = usageCallback; + } else { + bufferDelegate->usageCallback = [usageCallback, oldCallback = std::move(bufferDelegate->usageCallback)](const Buffer::BufferViewStorage &pView, const std::shared_ptr &buffer) { + oldCallback(pView, buffer); + usageCallback(pView, buffer); + }; + } + } + + void BufferView::Read(span data, vk::DeviceSize offset) const { + bufferDelegate->buffer->Read(data, offset + bufferDelegate->view->offset); + } + + void BufferView::Write(span data, vk::DeviceSize offset) const { + bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset); + } } diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index a7d7973b..3d774630 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -7,18 +7,7 @@ #include "memory_manager.h" namespace skyline::gpu { - /** - * @brief A descriptor for a GPU buffer on the guest - */ - struct GuestBuffer { - using Mappings = boost::container::small_vector, 3>; - Mappings mappings; //!< Spans to CPU memory for the underlying data backing this buffer - - /** - * @return The total size of the buffer by adding up the size of all mappings - */ - vk::DeviceSize BufferSize() const; - }; + using GuestBuffer = span; //!< The CPU mapping for the guest buffer, multiple mappings for buffers aren't supported since overlaps cannot be reconciled struct BufferView; class BufferManager; @@ -31,7 +20,6 @@ namespace skyline::gpu { private: GPU &gpu; std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing - vk::DeviceSize size; memory::Buffer backing; GuestBuffer guest; @@ -44,19 +32,46 @@ namespace skyline::gpu { GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer + public: /** * @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s) */ struct BufferViewStorage { - public: vk::DeviceSize offset; - vk::DeviceSize range; + vk::DeviceSize size; vk::Format format; - BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format); + BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format); }; + + private: std::list views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion + public: + /** + * @brief A delegate for a strong reference to a Buffer by a BufferView which can be changed to another Buffer transparently + * @note This class conforms to the Lockable and BasicLockable C++ named requirements + */ + struct BufferDelegate : public FenceCycleDependency { + std::shared_ptr buffer; + Buffer::BufferViewStorage *view; + std::function &)> usageCallback; + std::list::iterator iterator; + + BufferDelegate(std::shared_ptr buffer, Buffer::BufferViewStorage *view); + + ~BufferDelegate(); + + void lock(); + + void unlock(); + + bool try_lock(); + }; + + private: + std::list delegates; //!< The reference delegates for this buffer, used to prevent the buffer from being deleted while it is still in use + friend BufferView; friend BufferManager; @@ -131,9 +146,10 @@ namespace skyline::gpu { /** * @brief Synchronizes the guest buffer with the host buffer * @param skipTrap If true, setting up a CPU trap will be skipped and the dirty state will be Clean/CpuDirty + * @param skipFence If true, waiting on the currently attached fence will be skipped * @note The buffer **must** be locked prior to calling this */ - void SynchronizeGuest(bool skipTrap = false); + void SynchronizeGuest(bool skipTrap = false, bool skipFence = false); /** * @brief Synchronizes the guest buffer with the host buffer when the FenceCycle is signalled @@ -142,6 +158,11 @@ namespace skyline::gpu { */ void SynchronizeGuestWithCycle(const std::shared_ptr &cycle); + /** + * @brief Reads data at the specified offset in the buffer + */ + void Read(span data, vk::DeviceSize offset); + /** * @brief Writes data at the specified offset in the buffer */ @@ -151,7 +172,7 @@ namespace skyline::gpu { * @return A cached or newly created view into this buffer with the supplied attributes * @note The buffer **must** be locked prior to calling this */ - BufferView GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {}); + BufferView GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format = {}); }; /** @@ -160,41 +181,70 @@ namespace skyline::gpu { * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ struct BufferView { - std::shared_ptr buffer; - Buffer::BufferViewStorage *view; + std::shared_ptr bufferDelegate; BufferView(std::shared_ptr buffer, Buffer::BufferViewStorage *view); - constexpr BufferView(nullptr_t = nullptr) : buffer(nullptr), view(nullptr) {} - - constexpr operator bool() const { - return view != nullptr; - } - - constexpr Buffer::BufferViewStorage *operator->() { - return view; - } - - operator std::shared_ptr() { - return buffer; - } + constexpr BufferView(nullptr_t = nullptr) : bufferDelegate(nullptr) {} /** * @brief Acquires an exclusive lock on the buffer for the calling thread * @note Naming is in accordance to the BasicLockable named requirement */ - void lock(); + void lock() const { + bufferDelegate->lock(); + } /** * @brief Relinquishes an existing lock on the buffer by the calling thread * @note Naming is in accordance to the BasicLockable named requirement */ - void unlock(); + void unlock() const { + bufferDelegate->unlock(); + } /** * @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread * @note Naming is in accordance to the Lockable named requirement */ - bool try_lock(); + bool try_lock() const { + return bufferDelegate->try_lock(); + } + + constexpr operator bool() const { + return bufferDelegate != nullptr; + } + + /** + * @note The buffer **must** be locked prior to calling this + */ + Buffer::BufferDelegate *operator->() const { + return bufferDelegate.get(); + } + + /** + * @brief Attaches a fence cycle to the underlying buffer in a way that it will be synchronized with the latest backing buffer + * @note The view **must** be locked prior to calling this + */ + void AttachCycle(const std::shared_ptr &cycle); + + /** + * @brief Registers a callback for a usage of this view, it may be called multiple times due to the view being recreated with different backings + * @note The callback will be automatically called the first time after registration + * @note The view **must** be locked prior to calling this + */ + void RegisterUsage(const std::function &)> &usageCallback); + + /** + * @brief Reads data at the specified offset in the view + * @note The view **must** be locked prior to calling this + */ + void Read(span data, vk::DeviceSize offset) const; + + /** + * @brief Writes data at the specified offset in the view + * @note The view **must** be locked prior to calling this + */ + void Write(span data, vk::DeviceSize offset) const; }; } diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp index 1cadc117..d93e0c1e 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp @@ -8,101 +8,68 @@ namespace skyline::gpu { BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {} - BufferView BufferManager::FindOrCreate(const GuestBuffer &guest) { - auto guestMapping{guest.mappings.front()}; - - /* - * Iterate over all buffers that overlap with the first mapping of the guest buffer and compare the mappings: - * 1) All mappings match up perfectly, we check that the rest of the supplied mappings correspond to mappings in the buffer - * 1.1) If they match as well, we return a view encompassing the entire buffer - * 2) Only a contiguous range of mappings match, we check for the overlap bounds, it can go two ways: - * 2.1) If the supplied buffer is smaller than the matching buffer, we return a view encompassing the mappings into the buffer - * 2.2) If the matching buffer is smaller than the supplied buffer, we make the matching buffer larger and return it - * 3) If there's another overlap we go back to (1) with it else we go to (4) - * 4) Create a new buffer and insert it in the map then return it - */ + bool BufferManager::BufferLessThan(const std::shared_ptr &it, u8 *pointer) { + return it->guest.begin().base() < pointer; + } + BufferView BufferManager::FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr &cycle) { std::scoped_lock lock(mutex); - std::shared_ptr match{}; - auto mappingEnd{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)}, hostMapping{mappingEnd}; - if (hostMapping != buffers.begin() && (--hostMapping)->end() > guestMapping.begin()) { - auto &hostMappings{hostMapping->buffer->guest.mappings}; - if (hostMapping->contains(guestMapping)) { - // We need to check that all corresponding mappings in the candidate buffer and the guest buffer match up - // Only the start of the first matched mapping and the end of the last mapping can not match up as this is the case for views - auto firstHostMapping{hostMapping->iterator}; - auto lastGuestMapping{guest.mappings.back()}; - auto endHostMapping{std::find_if(firstHostMapping, hostMappings.end(), [&lastGuestMapping](const span &it) { - return lastGuestMapping.begin() > it.begin() && lastGuestMapping.end() > it.end(); - })}; //!< A past-the-end iterator for the last host mapping, the final valid mapping is prior to this iterator - bool mappingMatch{std::equal(firstHostMapping, endHostMapping, guest.mappings.begin(), guest.mappings.end(), [](const span &lhs, const span &rhs) { - return lhs.end() == rhs.end(); // We check end() here to implicitly ignore any offset from the first mapping - })}; - auto &lastHostMapping{*std::prev(endHostMapping)}; - if (firstHostMapping == hostMappings.begin() && firstHostMapping->begin() == guestMapping.begin() && mappingMatch && endHostMapping == hostMappings.end() && lastGuestMapping.end() == lastHostMapping.end()) { - // We've gotten a perfect 1:1 match for *all* mappings from the start to end - std::scoped_lock bufferLock(*hostMapping->buffer); - return hostMapping->buffer->GetView(0, hostMapping->buffer->size); - } else if (mappingMatch && firstHostMapping->begin() > guestMapping.begin() && lastHostMapping.end() > lastGuestMapping.end()) { - // We've gotten a guest buffer that is located entirely within a host buffer - std::scoped_lock bufferLock(*hostMapping->buffer); - return hostMapping->buffer->GetView(hostMapping->offset + static_cast(hostMapping->begin() - guestMapping.begin()), guest.BufferSize()); - } + // Lookup for any buffers overlapping with the supplied guest mapping + boost::container::small_vector, 4> overlaps; + for (auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), guestMapping.end().base(), BufferLessThan)}; entryIt != buffers.begin() && (*--entryIt)->guest.begin() <= guestMapping.end();) + if ((*entryIt)->guest.end() > guestMapping.begin()) + overlaps.push_back(*entryIt); + + if (overlaps.size() == 1) [[likely]] { + auto buffer{overlaps.front()}; + if (buffer->guest.begin() <= guestMapping.begin() && buffer->guest.end() >= guestMapping.end()) { + // If we find a buffer which can entirely fit the guest mapping, we can just return a view into it + std::scoped_lock bufferLock{*buffer}; + return buffer->GetView(static_cast(guestMapping.begin() - buffer->guest.begin()), guestMapping.size()); } } - /* TODO: Handle overlapping buffers - // Create a list of all overlapping buffers and update the guest mappings to fit them all - boost::container::small_vector, u32>, 4> overlappingBuffers; - GuestBuffer::Mappings newMappings; - - auto guestMappingIt{guest.mappings.begin()}; - while (true) { - do { - hostMapping->begin(); - overlappingBuffers.emplace_back(hostMapping->buffer, 4); - } while (hostMapping != buffers.begin() && (--hostMapping)->end() > guestMappingIt->begin()); - - // Iterate over all guest mappings to find overlapping buffers, not just the first - auto nextGuestMappingIt{std::next(guestMappingIt)}; - if (nextGuestMappingIt != guest.mappings.end()) - hostMapping = std::upper_bound(buffers.begin(), buffers.end(), *nextGuestMappingIt); - else - break; - guestMappingIt = nextGuestMappingIt; + // Find the extents of the new buffer we want to create that can hold all overlapping buffers + auto lowestAddress{guestMapping.begin().base()}, highestAddress{guestMapping.end().base()}; + for (const auto &overlap : overlaps) { + auto mapping{overlap->guest}; + if (mapping.begin().base() < lowestAddress) + lowestAddress = mapping.begin().base(); + if (mapping.end().base() > highestAddress) + highestAddress = mapping.end().base(); } - // Create a buffer that can contain all the overlapping buffers - auto buffer{std::make_shared(gpu, guest)}; + auto newBuffer{std::make_shared(gpu, span(lowestAddress, highestAddress))}; + for (auto &overlap : overlaps) { + std::scoped_lock overlapLock{*overlap}; - // Delete mappings from all overlapping buffers and repoint all buffer views - for (auto &overlappingBuffer : overlappingBuffers) { - std::scoped_lock overlappingBufferLock(*overlappingBuffer.first); - auto &bufferMappings{hostMapping->buffer->guest.mappings}; + if (!overlap->cycle.owner_before(cycle)) + overlap->WaitOnFence(); // We want to only wait on the fence cycle if it's not the current fence cycle + overlap->SynchronizeGuest(true, true); // Sync back the buffer before we destroy it - // Delete all mappings of the overlapping buffers - while ((++it) != buffer->guest.mappings.end()) { - guestMapping = *it; - auto mapping{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)}; - buffers.emplace(mapping, BufferMapping{buffer, it, offset, guestMapping}); - offset += mapping->size_bytes(); + buffers.erase(std::find(buffers.begin(), buffers.end(), overlap)); + + // Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset + vk::DeviceSize overlapOffset{static_cast(overlap->guest.begin() - newBuffer->guest.begin())}; + if (overlapOffset != 0) + for (auto &view : overlap->views) + view.offset += overlapOffset; + + newBuffer->views.splice(newBuffer->views.end(), overlap->views); + + // Transfer all delegates references from the overlapping buffer to the new buffer + for (auto &delegate : overlap->delegates) { + atomic_exchange(&delegate->buffer, newBuffer); + if (delegate->usageCallback) + delegate->usageCallback(*delegate->view, newBuffer); } - } - */ - auto buffer{std::make_shared(gpu, guest)}; - auto it{buffer->guest.mappings.begin()}; - buffers.emplace(mappingEnd, BufferMapping{buffer, it, 0, guestMapping}); - - vk::DeviceSize offset{}; - while ((++it) != buffer->guest.mappings.end()) { - guestMapping = *it; - auto mapping{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)}; - buffers.emplace(mapping, BufferMapping{buffer, it, offset, guestMapping}); - offset += mapping->size_bytes(); + newBuffer->delegates.splice(newBuffer->delegates.end(), overlap->delegates); } - return buffer->GetView(0, buffer->size); + buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest.end().base(), BufferLessThan), newBuffer); + + return newBuffer->GetView(static_cast(guestMapping.begin() - newBuffer->guest.begin()), guestMapping.size()); } } diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.h b/app/src/main/cpp/skyline/gpu/buffer_manager.h index 97cc8671..05565b71 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.h +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.h @@ -11,25 +11,14 @@ namespace skyline::gpu { */ class BufferManager { private: - /** - * @brief A single contiguous mapping of a buffer in the CPU address space - */ - struct BufferMapping : span { - std::shared_ptr buffer; - GuestBuffer::Mappings::iterator iterator; //!< An iterator to the mapping in the buffer's GuestBufferMappings corresponding to this mapping - vk::DeviceSize offset; //!< Offset of this mapping relative to the start of the buffer - - template - BufferMapping(std::shared_ptr buffer, GuestBuffer::Mappings::iterator iterator, vk::DeviceSize offset, Args &&... args) - : span(std::forward(args)...), - buffer(std::move(buffer)), - iterator(iterator), - offset(offset) {} - }; - GPU &gpu; std::mutex mutex; //!< Synchronizes access to the buffer mappings - std::vector buffers; //!< A sorted vector of all buffer mappings + std::vector> buffers; //!< A sorted vector of all buffer mappings + + /** + * @return If the end of the supplied buffer is less than the supplied pointer + */ + static bool BufferLessThan(const std::shared_ptr &it, u8 *pointer); public: BufferManager(GPU &gpu); @@ -37,6 +26,6 @@ namespace skyline::gpu { /** * @return A pre-existing or newly created Buffer object which covers the supplied mappings */ - BufferView FindOrCreate(const GuestBuffer &guest); + BufferView FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr &cycle = nullptr); }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index b7f67584..099bb8a1 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -35,13 +35,10 @@ namespace skyline::gpu::interconnect { cycle->AttachObject(view->shared_from_this()); } - void CommandExecutor::AttachBuffer(BufferView view) { - auto buffer{view.buffer.get()}; - if (!syncBuffers.contains(buffer)) { - buffer->WaitOnFence(); - buffer->cycle = cycle; - cycle->AttachObject(view); - syncBuffers.emplace(buffer); + void CommandExecutor::AttachBuffer(BufferView &view) { + if (!syncBuffers.contains(view.bufferDelegate)) { + view.AttachCycle(cycle); + syncBuffers.emplace(view.bufferDelegate); } } @@ -133,8 +130,10 @@ namespace skyline::gpu::interconnect { for (auto texture : syncTextures) texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true); - for (auto buffer : syncBuffers) - buffer->SynchronizeHostWithCycle(cycle, true); + for (const auto& delegate : syncBuffers) { + delegate->buffer->SynchronizeHostWithCycle(cycle, true); + delegate->usageCallback = nullptr; + } vk::RenderPass lRenderPass; u32 subpassIndex; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index b6dce15d..88fea99a 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -16,11 +16,12 @@ namespace skyline::gpu::interconnect { private: GPU &gpu; CommandScheduler::ActiveCommandBuffer activeCommandBuffer; - std::shared_ptr cycle; boost::container::stable_vector nodes; node::RenderPassNode *renderPass{}; std::unordered_set syncTextures; //!< All textures that need to be synced prior to and after execution - std::unordered_set syncBuffers; //!< All buffers that need to be synced prior to and after execution + + using SharedBufferDelegate = std::shared_ptr; + std::unordered_set syncBuffers; //!< All buffers that need to be synced prior to and after execution /** * @return If a new render pass was created by the function or the current one was reused as it was compatible @@ -28,6 +29,8 @@ namespace skyline::gpu::interconnect { bool CreateRenderPass(vk::Rect2D renderArea); public: + std::shared_ptr cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands + CommandExecutor(const DeviceState &state); ~CommandExecutor(); @@ -44,7 +47,7 @@ namespace skyline::gpu::interconnect { * @note The supplied buffer **must** be locked by the calling thread * @note This'll automatically handle syncing of the buffer in the most optimal way possible */ - void AttachBuffer(BufferView view); + void AttachBuffer(BufferView &view); /** * @brief Attach the lifetime of the fence cycle dependency to the command buffer diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index d518c54c..dab3ba2a 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -571,7 +571,6 @@ namespace skyline::gpu::interconnect { struct ConstantBuffer { IOVA iova; u32 size; - GuestBuffer guest; BufferView view; /** @@ -581,20 +580,9 @@ namespace skyline::gpu::interconnect { template T Read(size_t offset) const { T object; - size_t objectOffset{}; - for (auto &mapping : guest.mappings) { - if (offset < mapping.size_bytes()) { - auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))}; - std::memcpy(reinterpret_cast(&object) + objectOffset, mapping.data() + offset, copySize); - objectOffset += copySize; - if (objectOffset == sizeof(T)) - return object; - offset = mapping.size_bytes(); - } else { - offset -= mapping.size_bytes(); - } - } - throw exception("Object extent ({} + {} = {}) is larger than constant buffer size: {}", size + offset, sizeof(T), size + offset + sizeof(T), size); + std::scoped_lock lock{view}; + view.Read(span(object).template cast(), offset); + return object; } /** @@ -604,7 +592,7 @@ namespace skyline::gpu::interconnect { template void Write(T &object, size_t offset) { std::scoped_lock lock{view}; - view.buffer->Write(span(object).template cast(), view->offset + offset); + view.Write(span(object).template cast(), offset); } }; ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it @@ -633,12 +621,7 @@ namespace skyline::gpu::interconnect { auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)}; - // Ignore unmapped areas from mappings due to buggy games setting the wrong cbuf size - mappings.erase(ranges::find_if(mappings, [](const auto &mapping) { return !mapping.valid(); }), mappings.end()); - - constantBufferSelector.guest.mappings.assign(mappings.begin(), mappings.end()); - - constantBufferSelector.view = gpu.buffer.FindOrCreate(constantBufferSelector.guest); + constantBufferSelector.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle); return constantBufferSelector; } @@ -750,14 +733,11 @@ namespace skyline::gpu::interconnect { .convert_depth_mode = true // This is required for the default GPU register state }; - constexpr static size_t PipelineUniqueDescriptorTypeCount{2}; //!< The amount of unique descriptor types that may be bound to a pipeline - constexpr static size_t MaxPipelineDescriptorWriteCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The maxium amount of descriptors writes that are used to bind a pipeline + constexpr static size_t PipelineUniqueDescriptorTypeCount{3}; //!< The amount of unique descriptor types that may be bound to a pipeline + constexpr static size_t PipelineDescriptorWritesReservedCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The amount of descriptors writes reserved in advance to bind a pipeline, this is not a hard limit due to the Adreno descriptor quirk constexpr static size_t MaxPipelineDescriptorCount{100}; //!< The maxium amount of descriptors we support being bound to a pipeline - boost::container::static_vector descriptorSetWrites; boost::container::static_vector layoutBindings; - boost::container::static_vector bufferInfo; - boost::container::static_vector imageInfo; /** * @brief All state concerning the shader programs and their bindings @@ -767,7 +747,18 @@ namespace skyline::gpu::interconnect { boost::container::static_vector, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage boost::container::static_vector shaderStages; //!< Shader modules for every pipeline stage vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero) - span descriptorSetWrites; //!< The writes to the descriptor set that need to be done prior to executing a pipeline + + struct DescriptorSetWrites { + std::vector writes; //!< The descriptor set writes for the pipeline + std::vector bufferDescriptors; //!< The storage for buffer descriptors + std::vector imageDescriptors; //!< The storage for image descriptors + + std::vector &operator*() { + return writes; + } + }; + + std::unique_ptr descriptorSetWrites; //!< The writes to the descriptor set that need to be done prior to executing a pipeline }; /** @@ -798,11 +789,10 @@ namespace skyline::gpu::interconnect { auto ssbo{cbuf.Read(descriptor.cbuf_offset)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(ssbo.iova, ssbo.size)}; + if (mappings.size() != 1) + Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size()); - GuestBuffer guestBuffer; - guestBuffer.mappings.assign(mappings.begin(), mappings.end()); - - return gpu.buffer.FindOrCreate(guestBuffer); + return gpu.buffer.FindOrCreate(mappings.front(), executor.cycle); } /** @@ -889,14 +879,29 @@ namespace skyline::gpu::interconnect { } } - descriptorSetWrites.clear(); + auto descriptorSetWrites{std::make_unique()}; + auto &descriptorWrites{**descriptorSetWrites}; + descriptorWrites.reserve(PipelineDescriptorWritesReservedCount); + + auto &bufferDescriptors{descriptorSetWrites->bufferDescriptors}; + auto &imageDescriptors{descriptorSetWrites->imageDescriptors}; + size_t bufferCount{}, imageCount{}; + for (auto &pipelineStage : pipelineStages) { + if (pipelineStage.enabled) { + auto &program{pipelineStage.program->program}; + bufferCount += program.info.constant_buffer_descriptors.size() + program.info.storage_buffers_descriptors.size(); + imageCount += program.info.texture_descriptors.size(); + } + } + bufferDescriptors.resize(bufferCount); + imageDescriptors.resize(imageCount); + layoutBindings.clear(); - bufferInfo.clear(); - imageInfo.clear(); runtimeInfo.previous_stage_stores.mask.set(); // First stage should always have all bits set ShaderCompiler::Backend::Bindings bindings{}; + size_t bufferIndex{}, imageIndex{}; boost::container::static_vector, maxwell3d::PipelineStageCount> shaderModules; boost::container::static_vector shaderStages; for (auto &pipelineStage : pipelineStages) { @@ -920,11 +925,11 @@ namespace skyline::gpu::interconnect { u32 bindingIndex{pipelineStage.bindingBase}; if (!program.info.constant_buffer_descriptors.empty()) { - descriptorSetWrites.push_back(vk::WriteDescriptorSet{ + descriptorWrites.push_back(vk::WriteDescriptorSet{ .dstBinding = bindingIndex, .descriptorCount = static_cast(program.info.constant_buffer_descriptors.size()), .descriptorType = vk::DescriptorType::eUniformBuffer, - .pBufferInfo = bufferInfo.data() + bufferInfo.size(), + .pBufferInfo = bufferDescriptors.data() + bufferIndex, }); for (auto &constantBuffer : program.info.constant_buffer_descriptors) { @@ -936,23 +941,24 @@ namespace skyline::gpu::interconnect { }); auto view{pipelineStage.constantBuffers[constantBuffer.index].view}; - std::scoped_lock lock{view}; - bufferInfo.push_back(vk::DescriptorBufferInfo{ - .buffer = view.buffer->GetBacking(), - .offset = view->offset, - .range = view->range, + std::scoped_lock lock(view); + view.RegisterUsage([descriptor = bufferDescriptors.data() + bufferIndex++](const Buffer::BufferViewStorage &view, const std::shared_ptr &buffer) { + *descriptor = vk::DescriptorBufferInfo{ + .buffer = buffer->GetBacking(), + .offset = view.offset, + .range = view.size, + }; }); executor.AttachBuffer(view); } } - if (!program.info.storage_buffers_descriptors.empty()) { - descriptorSetWrites.push_back({ + descriptorWrites.push_back(vk::WriteDescriptorSet{ .dstBinding = bindingIndex, .descriptorCount = static_cast(program.info.storage_buffers_descriptors.size()), .descriptorType = vk::DescriptorType::eStorageBuffer, - .pBufferInfo = bufferInfo.data() + bufferInfo.size(), + .pBufferInfo = bufferDescriptors.data() + bufferIndex, }); for (auto &storageBuffer : program.info.storage_buffers_descriptors) { @@ -965,10 +971,12 @@ namespace skyline::gpu::interconnect { auto view{GetSsboViewFromDescriptor(storageBuffer, pipelineStage.constantBuffers)}; std::scoped_lock lock{view}; - bufferInfo.push_back(vk::DescriptorBufferInfo{ - .buffer = view.buffer->GetBacking(), - .offset = view->offset, - .range = view->range, + view.RegisterUsage([descriptor = bufferDescriptors.data() + bufferIndex++](const Buffer::BufferViewStorage &view, const std::shared_ptr &buffer) { + *descriptor = vk::DescriptorBufferInfo{ + .buffer = buffer->GetBacking(), + .offset = view.offset, + .range = view.size, + }; }); executor.AttachBuffer(view); } @@ -982,22 +990,22 @@ namespace skyline::gpu::interconnect { if (!program.info.texture_descriptors.empty()) { if (!gpu.traits.quirks.needsIndividualTextureBindingWrites) - descriptorSetWrites.push_back(vk::WriteDescriptorSet{ + descriptorWrites.push_back(vk::WriteDescriptorSet{ .dstBinding = bindingIndex, .descriptorCount = static_cast(program.info.texture_descriptors.size()), .descriptorType = vk::DescriptorType::eCombinedImageSampler, - .pImageInfo = imageInfo.data() + imageInfo.size(), + .pImageInfo = imageDescriptors.data() + imageIndex, }); else - descriptorSetWrites.reserve(descriptorSetWrites.size() + program.info.texture_descriptors.size()); + descriptorWrites.reserve(descriptorWrites.size() + program.info.texture_descriptors.size()); for (auto &texture : program.info.texture_descriptors) { if (gpu.traits.quirks.needsIndividualTextureBindingWrites) - descriptorSetWrites.push_back(vk::WriteDescriptorSet{ + descriptorWrites.push_back(vk::WriteDescriptorSet{ .dstBinding = bindingIndex, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eCombinedImageSampler, - .pImageInfo = imageInfo.data() + imageInfo.size(), + .pImageInfo = imageDescriptors.data() + imageIndex, }); layoutBindings.push_back(vk::DescriptorSetLayoutBinding{ @@ -1020,11 +1028,11 @@ namespace skyline::gpu::interconnect { auto textureView{GetPoolTextureView(handle.textureIndex)}; std::scoped_lock lock(*textureView); - imageInfo.push_back(vk::DescriptorImageInfo{ + imageDescriptors[imageIndex++] = vk::DescriptorImageInfo{ .sampler = **sampler, .imageView = textureView->GetView(), .imageLayout = textureView->texture->layout, - }); + }; executor.AttachTexture(textureView.get()); executor.AttachDependency(std::move(sampler)); } @@ -1048,7 +1056,7 @@ namespace skyline::gpu::interconnect { .pBindings = layoutBindings.data(), .bindingCount = static_cast(layoutBindings.size()), }), - descriptorSetWrites, + std::move(descriptorSetWrites), }; } @@ -1643,11 +1651,11 @@ namespace skyline::gpu::interconnect { else if (vertexBuffer.view) return vertexBuffer.view; - GuestBuffer guest; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(vertexBuffer.start, (vertexBuffer.end + 1) - vertexBuffer.start)}; - guest.mappings.assign(mappings.begin(), mappings.end()); + if (mappings.size() != 1) + Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size()); - vertexBuffer.view = gpu.buffer.FindOrCreate(guest); + vertexBuffer.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle); return vertexBuffer.view; } @@ -1842,7 +1850,7 @@ namespace skyline::gpu::interconnect { * @tparam ConvGR Converts all green component * @tparam SwapBR Swaps blue and red components */ - template + template vk::ComponentMapping ConvertTicSwizzleMapping(TextureImageControl::FormatWord format) { auto convertComponentSwizzle{[](TextureImageControl::ImageSwizzle swizzle) { switch (swizzle) { @@ -2223,11 +2231,12 @@ namespace skyline::gpu::interconnect { else if (indexBuffer.view && size == indexBuffer.viewSize) return indexBuffer.view; - GuestBuffer guestBuffer; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(indexBuffer.start, size)}; - guestBuffer.mappings.assign(mappings.begin(), mappings.end()); + if (mappings.size() != 1) + Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size()); - indexBuffer.view = gpu.buffer.FindOrCreate(guestBuffer); + auto mapping{mappings.front()}; + indexBuffer.view = gpu.buffer.FindOrCreate(span(mapping.data(), size), executor.cycle); return indexBuffer.view; } @@ -2433,33 +2442,43 @@ namespace skyline::gpu::interconnect { void Draw(u32 count, u32 first, i32 vertexOffset = 0) { // Shader + Binding Setup auto programState{CompileShaderProgramState()}; - auto descriptorSet{gpu.descriptor.AllocateSet(*programState.descriptorSetLayout)}; - for (auto &descriptorSetWrite : programState.descriptorSetWrites) + for (auto &descriptorSetWrite : **programState.descriptorSetWrites) descriptorSetWrite.dstSet = descriptorSet; - gpu.vkDevice.updateDescriptorSets(programState.descriptorSetWrites, nullptr); vk::raii::PipelineLayout pipelineLayout(gpu.vkDevice, vk::PipelineLayoutCreateInfo{ .pSetLayouts = &*programState.descriptorSetLayout, .setLayoutCount = 1, }); - vk::Buffer indexBufferHandle; - vk::DeviceSize indexBufferOffset; - vk::IndexType indexBufferType; + struct BoundIndexBuffer { + vk::Buffer handle{}; + vk::DeviceSize offset{}; + vk::IndexType type{}; + }; + + auto boundIndexBuffer{std::make_shared()}; if constexpr (IsIndexed) { auto indexBufferView{GetIndexBuffer(count)}; - std::scoped_lock lock(indexBufferView); - executor.AttachBuffer(indexBufferView); + { + std::scoped_lock lock(indexBufferView); - indexBufferHandle = indexBufferView.buffer->GetBacking(); - indexBufferOffset = indexBufferView->offset; - indexBufferType = indexBuffer.type; + boundIndexBuffer->type = indexBuffer.type; + indexBufferView.RegisterUsage([=](const Buffer::BufferViewStorage &view, const std::shared_ptr &buffer) { + boundIndexBuffer->handle = buffer->GetBacking(); + boundIndexBuffer->offset = view.offset; + }); + + executor.AttachBuffer(indexBufferView); + } } // Vertex Buffer Setup - std::array vertexBufferHandles{}; - std::array vertexBufferOffsets{}; + struct BoundVertexBuffers { + std::array handles{}; + std::array offsets{}; + }; + auto boundVertexBuffers{std::make_shared()}; boost::container::static_vector vertexBindingDescriptions{}; boost::container::static_vector vertexBindingDivisorsDescriptions{}; @@ -2473,8 +2492,11 @@ namespace skyline::gpu::interconnect { vertexBindingDivisorsDescriptions.push_back(vertexBuffer.bindingDivisorDescription); std::scoped_lock vertexBufferLock(vertexBufferView); - vertexBufferHandles[index] = vertexBufferView.buffer->GetBacking(); - vertexBufferOffsets[index] = vertexBufferView->offset; + vertexBufferView.RegisterUsage([handle = boundVertexBuffers->handles.data() + index, offset = boundVertexBuffers->offsets.data() + index](const Buffer::BufferViewStorage &view, const std::shared_ptr &buffer) { + *handle = buffer->GetBacking(); + *offset = view.offset; + }); + executor.AttachBuffer(vertexBufferView); } } @@ -2505,18 +2527,29 @@ namespace skyline::gpu::interconnect { depthTargetLock.emplace(*depthRenderTargetView); // Draw Persistent Storage - struct Storage : FenceCycleDependency { + struct DrawStorage { + vk::raii::DescriptorSetLayout descriptorSetLayout; + std::unique_ptr descriptorSetWrites; vk::raii::PipelineLayout pipelineLayout; - std::optional pipeline; - DescriptorAllocator::ActiveDescriptorSet descriptorSet; - Storage(vk::raii::PipelineLayout &&pipelineLayout, DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : pipelineLayout(std::move(pipelineLayout)), descriptorSet(std::move(descriptorSet)) {} + DrawStorage(vk::raii::DescriptorSetLayout &&descriptorSetLayout, std::unique_ptr &&descriptorSetWrites, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout(std::move(descriptorSetLayout)), descriptorSetWrites(std::move(descriptorSetWrites)), pipelineLayout(std::move(pipelineLayout)) {} }; - auto storage{std::make_shared(std::move(pipelineLayout), std::move(descriptorSet))}; + auto drawStorage{std::make_shared(std::move(programState.descriptorSetLayout), std::move(programState.descriptorSetWrites), std::move(pipelineLayout))}; + + // Command Buffer Persistent Storage + struct FenceStorage : FenceCycleDependency { + std::optional pipeline; + DescriptorAllocator::ActiveDescriptorSet descriptorSet; + std::shared_ptr drawStorage{}; + + FenceStorage(DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : descriptorSet(std::move(descriptorSet)) {} + }; + + auto fenceStorage{std::make_shared(std::move(descriptorSet))}; // Submit Draw - executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, storage = std::move(storage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, vertexBufferHandles = std::move(vertexBufferHandles), vertexBufferOffsets = std::move(vertexBufferOffsets), pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable { + executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, drawStorage = std::move(drawStorage), fenceStorage = std::move(fenceStorage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable { vk::StructureChain vertexState{ vk::PipelineVertexInputStateCreateInfo{ .pVertexBindingDescriptions = vertexBindingDescriptions.data(), @@ -2553,7 +2586,7 @@ namespace skyline::gpu::interconnect { .pDepthStencilState = &depthState, .pColorBlendState = &blendState, .pDynamicState = nullptr, - .layout = *storage->pipelineLayout, + .layout = *drawStorage->pipelineLayout, .renderPass = renderPass, .subpass = subpassIndex, }; @@ -2564,6 +2597,7 @@ namespace skyline::gpu::interconnect { commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline.value); + auto &vertexBufferHandles{boundVertexBuffers->handles}; for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) { // We need to bind all non-null vertex buffers while skipping any null ones if (vertexBufferHandles[bindingIndex]) { @@ -2572,24 +2606,26 @@ namespace skyline::gpu::interconnect { bindingEndIndex++; u32 bindingCount{bindingEndIndex - bindingIndex}; - commandBuffer.bindVertexBuffers(bindingIndex, span(vertexBufferHandles.data() + bindingIndex, bindingCount), span(vertexBufferOffsets.data() + bindingIndex, bindingCount)); + commandBuffer.bindVertexBuffers(bindingIndex, span(vertexBufferHandles.data() + bindingIndex, bindingCount), span(boundVertexBuffers->offsets.data() + bindingIndex, bindingCount)); } } - commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *storage->pipelineLayout, 0, storage->descriptorSet, nullptr); + vkDevice.updateDescriptorSets(**drawStorage->descriptorSetWrites, nullptr); + commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, fenceStorage->descriptorSet, nullptr); if constexpr (IsIndexed) { - commandBuffer.bindIndexBuffer(indexBufferHandle, indexBufferOffset, indexBufferType); + commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type); commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0); } else { commandBuffer.draw(count, 1, first, 0); } - storage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value); + fenceStorage->drawStorage = drawStorage; + fenceStorage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value); - cycle->AttachObject(storage); + cycle->AttachObject(fenceStorage); }, vk::Rect2D{ - .extent = activeColorRenderTargets[0]->texture->dimensions, + .extent = activeColorRenderTargets.front()->texture->dimensions, }, {}, activeColorRenderTargets, depthRenderTargetView); }