mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-07 19:25:30 +03:00
Introduce Context
semantics to GPU resource locking
Resources on the GPU can be fairly convoluted and involve overlaps which can lead to the same GPU resources being utilized with different views, we previously utilized fences to lock resources to prevent concurrent access but this was overly harsh as it would block usage of resources till GPU completion of the commands associated with a resource. Fences have now been replaced with locks but locks run into the issue of being per-view and therefore to add a common object for tracking usage the concept of "tags" was introduced to track a single context so locks can be skipped if they're from the same context. This is important to prevent a deadlock when locking a resource which has been already locked from the current context with a different view.
This commit is contained in:
parent
d913f29662
commit
6b9269b88e
@ -34,12 +34,11 @@ namespace skyline::gpu {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), backing(gpu.memory.AllocateBuffer(guest.size())), guest(guest) {
|
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu{gpu}, backing{gpu.memory.AllocateBuffer(guest.size())}, guest{guest} {
|
||||||
SetupGuestMappings();
|
SetupGuestMappings();
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer::Buffer(GPU &gpu, const std::shared_ptr<FenceCycle> &pCycle, GuestBuffer guest, span<std::shared_ptr<Buffer>> srcBuffers) : gpu(gpu), backing(gpu.memory.AllocateBuffer(guest.size())), guest(guest) {
|
Buffer::Buffer(GPU &gpu, GuestBuffer guest, ContextTag tag, span<std::shared_ptr<Buffer>> srcBuffers) : gpu{gpu}, backing{gpu.memory.AllocateBuffer(guest.size())}, guest{guest} {
|
||||||
std::scoped_lock bufLock{*this};
|
|
||||||
SetupGuestMappings();
|
SetupGuestMappings();
|
||||||
|
|
||||||
// Source buffers don't necessarily fully overlap with us so we have to perform a sync here to prevent any gaps
|
// Source buffers don't necessarily fully overlap with us so we have to perform a sync here to prevent any gaps
|
||||||
@ -60,7 +59,7 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
// Transfer data/state from source buffers
|
// Transfer data/state from source buffers
|
||||||
for (const auto &srcBuffer : srcBuffers) {
|
for (const auto &srcBuffer : srcBuffers) {
|
||||||
std::scoped_lock lock{*srcBuffer};
|
ContextLock lock{tag, *srcBuffer};
|
||||||
if (srcBuffer->guest) {
|
if (srcBuffer->guest) {
|
||||||
if (srcBuffer->hostImmutableCycle) {
|
if (srcBuffer->hostImmutableCycle) {
|
||||||
// Propagate any host immutability
|
// Propagate any host immutability
|
||||||
@ -75,8 +74,8 @@ namespace skyline::gpu {
|
|||||||
if (srcBuffer->dirtyState == Buffer::DirtyState::GpuDirty) {
|
if (srcBuffer->dirtyState == Buffer::DirtyState::GpuDirty) {
|
||||||
// If the source buffer is GPU dirty we cannot directly copy over its GPU backing contents
|
// If the source buffer is GPU dirty we cannot directly copy over its GPU backing contents
|
||||||
|
|
||||||
// Only sync back the buffer if it's not attached to the current fence cycle, otherwise propagate the GPU dirtiness
|
// Only sync back the buffer if it's not attached to the current context, otherwise propagate the GPU dirtiness
|
||||||
if (!srcBuffer->cycle.owner_before(pCycle)) {
|
if (lock.isFirst) {
|
||||||
// Perform a GPU -> CPU sync on the source then do a CPU -> GPU sync for the region occupied by the source
|
// Perform a GPU -> CPU sync on the source then do a CPU -> GPU sync for the region occupied by the source
|
||||||
// This is required since if we were created from a two buffers: one GPU dirty in the current cycle, and one GPU dirty in the previous cycle, if we marked ourselves as CPU dirty here then the GPU dirtiness from the current cycle buffer would be ignored and cause writes to be missed
|
// This is required since if we were created from a two buffers: one GPU dirty in the current cycle, and one GPU dirty in the previous cycle, if we marked ourselves as CPU dirty here then the GPU dirtiness from the current cycle buffer would be ignored and cause writes to be missed
|
||||||
srcBuffer->SynchronizeGuest(true);
|
srcBuffer->SynchronizeGuest(true);
|
||||||
@ -100,12 +99,12 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Buffer::~Buffer() {
|
Buffer::~Buffer() {
|
||||||
std::scoped_lock lock{*this};
|
|
||||||
if (trapHandle)
|
if (trapHandle)
|
||||||
gpu.state.nce->DeleteTrap(*trapHandle);
|
gpu.state.nce->DeleteTrap(*trapHandle);
|
||||||
SynchronizeGuest(true);
|
SynchronizeGuest(true);
|
||||||
if (alignedMirror.valid())
|
if (alignedMirror.valid())
|
||||||
munmap(alignedMirror.data(), alignedMirror.size());
|
munmap(alignedMirror.data(), alignedMirror.size());
|
||||||
|
WaitOnFence();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::MarkGpuDirty() {
|
void Buffer::MarkGpuDirty() {
|
||||||
@ -289,6 +288,28 @@ namespace skyline::gpu {
|
|||||||
hostImmutableCycle = pCycle;
|
hostImmutableCycle = pCycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Buffer::lock() {
|
||||||
|
mutex.lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Buffer::LockWithTag(ContextTag pTag) {
|
||||||
|
if (pTag && pTag == tag)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mutex.lock();
|
||||||
|
tag = pTag;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::unlock() {
|
||||||
|
tag = ContextTag{};
|
||||||
|
mutex.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Buffer::try_lock() {
|
||||||
|
return mutex.try_lock();
|
||||||
|
}
|
||||||
|
|
||||||
Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}
|
Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}
|
||||||
|
|
||||||
Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, const Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
|
Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, const Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
|
||||||
@ -296,7 +317,6 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Buffer::BufferDelegate::~BufferDelegate() {
|
Buffer::BufferDelegate::~BufferDelegate() {
|
||||||
std::scoped_lock lock(*this);
|
|
||||||
buffer->delegates.erase(iterator);
|
buffer->delegates.erase(iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -314,6 +334,21 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Buffer::BufferDelegate::LockWithTag(ContextTag pTag) {
|
||||||
|
auto lBuffer{std::atomic_load(&buffer)};
|
||||||
|
while (true) {
|
||||||
|
bool didLock{lBuffer->LockWithTag(pTag)};
|
||||||
|
|
||||||
|
auto latestBacking{std::atomic_load(&buffer)};
|
||||||
|
if (lBuffer == latestBacking)
|
||||||
|
return didLock;
|
||||||
|
|
||||||
|
if (didLock)
|
||||||
|
lBuffer->unlock();
|
||||||
|
lBuffer = latestBacking;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Buffer::BufferDelegate::unlock() {
|
void Buffer::BufferDelegate::unlock() {
|
||||||
buffer->unlock();
|
buffer->unlock();
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <boost/functional/hash.hpp>
|
#include <boost/functional/hash.hpp>
|
||||||
#include <nce.h>
|
#include <nce.h>
|
||||||
|
#include <gpu/tag_allocator.h>
|
||||||
#include "memory_manager.h"
|
#include "memory_manager.h"
|
||||||
|
|
||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
@ -23,6 +24,7 @@ namespace skyline::gpu {
|
|||||||
private:
|
private:
|
||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
|
std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
|
||||||
|
std::atomic<ContextTag> tag{}; //!< The tag associated with the last lock call
|
||||||
memory::Buffer backing;
|
memory::Buffer backing;
|
||||||
std::optional<GuestBuffer> guest;
|
std::optional<GuestBuffer> guest;
|
||||||
|
|
||||||
@ -103,6 +105,8 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
void lock();
|
void lock();
|
||||||
|
|
||||||
|
bool LockWithTag(ContextTag tag);
|
||||||
|
|
||||||
void unlock();
|
void unlock();
|
||||||
|
|
||||||
bool try_lock();
|
bool try_lock();
|
||||||
@ -120,7 +124,7 @@ namespace skyline::gpu {
|
|||||||
void SetupGuestMappings();
|
void SetupGuestMappings();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::weak_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the buffer has completed, it must be waited on prior to any mutations to the backing
|
std::weak_ptr<FenceCycle> cycle{}; //!< A fence cycle for when any host operation mutating the buffer has completed, it must be waited on prior to any mutations to the backing
|
||||||
|
|
||||||
constexpr vk::Buffer GetBacking() {
|
constexpr vk::Buffer GetBacking() {
|
||||||
return backing.vkBuffer;
|
return backing.vkBuffer;
|
||||||
@ -140,10 +144,10 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates a Buffer that is pre-synchronised with the contents of the input buffers
|
* @brief Creates a Buffer that is pre-synchronised with the contents of the input buffers
|
||||||
* @param pCycle The FenceCycle associated with the current workload, utilised for synchronising GPU dirty buffers
|
* @param tag The tag to associate locking of srcBuffers with
|
||||||
* @param srcBuffers Span of overlapping source buffers
|
* @param srcBuffers Span of overlapping source buffers
|
||||||
*/
|
*/
|
||||||
Buffer(GPU &gpu, const std::shared_ptr<FenceCycle> &pCycle, GuestBuffer guest, span<std::shared_ptr<Buffer>> srcBuffers);
|
Buffer(GPU &gpu, GuestBuffer guest, ContextTag tag, span<std::shared_ptr<Buffer>> srcBuffers);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates a host-only Buffer which isn't backed by any guest buffer
|
* @brief Creates a host-only Buffer which isn't backed by any guest buffer
|
||||||
@ -157,25 +161,27 @@ namespace skyline::gpu {
|
|||||||
* @brief Acquires an exclusive lock on the buffer for the calling thread
|
* @brief Acquires an exclusive lock on the buffer for the calling thread
|
||||||
* @note Naming is in accordance to the BasicLockable named requirement
|
* @note Naming is in accordance to the BasicLockable named requirement
|
||||||
*/
|
*/
|
||||||
void lock() {
|
void lock();
|
||||||
mutex.lock();
|
|
||||||
}
|
/**
|
||||||
|
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||||
|
* @param tag A tag to associate with the lock, future invocations with the same tag prior to the unlock will acquire the lock without waiting (0 is not a valid tag value and will disable tag behavior)
|
||||||
|
* @return If the lock was acquired by this call rather than having the same tag as the holder
|
||||||
|
* @note All locks using the same tag **must** be from the same thread as it'll only have one corresponding unlock() call
|
||||||
|
*/
|
||||||
|
bool LockWithTag(ContextTag tag);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Relinquishes an existing lock on the buffer by the calling thread
|
* @brief Relinquishes an existing lock on the buffer by the calling thread
|
||||||
* @note Naming is in accordance to the BasicLockable named requirement
|
* @note Naming is in accordance to the BasicLockable named requirement
|
||||||
*/
|
*/
|
||||||
void unlock() {
|
void unlock();
|
||||||
mutex.unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
|
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
|
||||||
* @note Naming is in accordance to the Lockable named requirement
|
* @note Naming is in accordance to the Lockable named requirement
|
||||||
*/
|
*/
|
||||||
bool try_lock() {
|
bool try_lock();
|
||||||
return mutex.try_lock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Marks the buffer as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
|
* @brief Marks the buffer as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
|
||||||
@ -312,6 +318,16 @@ namespace skyline::gpu {
|
|||||||
bufferDelegate->lock();
|
bufferDelegate->lock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||||
|
* @param tag A tag to associate with the lock, future invocations with the same tag prior to the unlock will acquire the lock without waiting (0 is not a valid tag value and will disable tag behavior)
|
||||||
|
* @return If the lock was acquired without waiting (i.e. the tag was the same as the last lock)
|
||||||
|
* @note All locks using the same tag **must** be from the same thread as it'll only have one corresponding unlock() call
|
||||||
|
*/
|
||||||
|
bool LockWithTag(ContextTag tag) const {
|
||||||
|
return bufferDelegate->LockWithTag(tag);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Relinquishes an existing lock on the buffer by the calling thread
|
* @brief Relinquishes an existing lock on the buffer by the calling thread
|
||||||
* @note Naming is in accordance to the BasicLockable named requirement
|
* @note Naming is in accordance to the BasicLockable named requirement
|
||||||
|
@ -12,7 +12,7 @@ namespace skyline::gpu {
|
|||||||
return it->guest->begin().base() < pointer;
|
return it->guest->begin().base() < pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferView BufferManager::FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr<FenceCycle> &cycle) {
|
BufferView BufferManager::FindOrCreate(GuestBuffer guestMapping, ContextTag tag) {
|
||||||
/*
|
/*
|
||||||
* We align the buffer to the page boundary to ensure that:
|
* We align the buffer to the page boundary to ensure that:
|
||||||
* 1) Any buffer view has the same alignment guarantees as on the guest, this is required for UBOs, SSBOs and Texel buffers
|
* 1) Any buffer view has the same alignment guarantees as on the guest, this is required for UBOs, SSBOs and Texel buffers
|
||||||
@ -34,7 +34,7 @@ namespace skyline::gpu {
|
|||||||
auto buffer{overlaps.front()};
|
auto buffer{overlaps.front()};
|
||||||
if (buffer->guest->begin() <= guestMapping.begin() && buffer->guest->end() >= guestMapping.end()) {
|
if (buffer->guest->begin() <= guestMapping.begin() && buffer->guest->end() >= guestMapping.end()) {
|
||||||
// If we find a buffer which can entirely fit the guest mapping, we can just return a view into it
|
// If we find a buffer which can entirely fit the guest mapping, we can just return a view into it
|
||||||
std::scoped_lock bufferLock{*buffer};
|
ContextLock bufferLock{tag, *buffer};
|
||||||
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
|
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -49,9 +49,9 @@ namespace skyline::gpu {
|
|||||||
highestAddress = mapping.end().base();
|
highestAddress = mapping.end().base();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto newBuffer{std::make_shared<Buffer>(gpu, cycle, span<u8>(lowestAddress, highestAddress), overlaps)};
|
auto newBuffer{std::make_shared<Buffer>(gpu, span<u8>{lowestAddress, highestAddress}, tag, overlaps)};
|
||||||
for (auto &overlap : overlaps) {
|
for (auto &overlap : overlaps) {
|
||||||
std::scoped_lock overlapLock{*overlap};
|
ContextLock overlapLock{tag, *overlap};
|
||||||
|
|
||||||
buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
|
buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ namespace skyline::gpu {
|
|||||||
// This is a slight hack as we really shouldn't be changing the underlying non-mutable set elements without a rehash but without writing our own set impl this is the best we can do
|
// This is a slight hack as we really shouldn't be changing the underlying non-mutable set elements without a rehash but without writing our own set impl this is the best we can do
|
||||||
const_cast<Buffer::BufferViewStorage *>(&*it)->offset += overlapOffset;
|
const_cast<Buffer::BufferViewStorage *>(&*it)->offset += overlapOffset;
|
||||||
|
|
||||||
// Reset the sequence number to the initial one, if the new buffer was created from any GPU dirty overlaps then the new buffer's sequence will be incremented past this thus forcing a reacquire if neccessary
|
// Reset the sequence number to the initial one, if the new buffer was created from any GPU dirty overlaps then the new buffer's sequence will be incremented past this thus forcing a reacquire if necessary
|
||||||
// This is fine to do in the set since the hash and operator== do not use this value
|
// This is fine to do in the set since the hash and operator== do not use this value
|
||||||
it->lastAcquiredSequence = Buffer::InitialSequenceNumber;
|
it->lastAcquiredSequence = Buffer::InitialSequenceNumber;
|
||||||
}
|
}
|
||||||
|
@ -42,16 +42,16 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
BufferManager(GPU &gpu);
|
BufferManager(GPU &gpu);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return A pre-existing or newly created Buffer object which covers the supplied mappings
|
||||||
|
*/
|
||||||
|
BufferView FindOrCreate(GuestBuffer guestMapping, ContextTag tag = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return A dynamically allocated megabuffer which can be used to store buffer modifications allowing them to be replayed in-sequence on the GPU
|
* @return A dynamically allocated megabuffer which can be used to store buffer modifications allowing them to be replayed in-sequence on the GPU
|
||||||
* @note This object **must** be destroyed to be reclaimed by the manager and prevent a memory leak
|
* @note This object **must** be destroyed to be reclaimed by the manager and prevent a memory leak
|
||||||
*/
|
*/
|
||||||
MegaBuffer AcquireMegaBuffer(const std::shared_ptr<FenceCycle> &cycle);
|
MegaBuffer AcquireMegaBuffer(const std::shared_ptr<FenceCycle> &cycle);
|
||||||
|
|
||||||
/**
|
|
||||||
* @return A pre-existing or newly created Buffer object which covers the supplied mappings
|
|
||||||
*/
|
|
||||||
BufferView FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr<FenceCycle> &cycle = nullptr);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -118,15 +118,11 @@ namespace skyline::gpu::interconnect {
|
|||||||
auto srcGuestTexture{GetGuestTexture(srcSurface)};
|
auto srcGuestTexture{GetGuestTexture(srcSurface)};
|
||||||
auto dstGuestTexture{GetGuestTexture(dstSurface)};
|
auto dstGuestTexture{GetGuestTexture(dstSurface)};
|
||||||
|
|
||||||
auto srcTextureView{gpu.texture.FindOrCreate(srcGuestTexture)};
|
auto srcTextureView{gpu.texture.FindOrCreate(srcGuestTexture, executor.tag)};
|
||||||
auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture)};
|
|
||||||
|
|
||||||
{
|
|
||||||
std::scoped_lock lock{*srcTextureView, *dstTextureView};
|
|
||||||
|
|
||||||
executor.AttachTexture(&*srcTextureView);
|
executor.AttachTexture(&*srcTextureView);
|
||||||
|
|
||||||
|
auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture, executor.tag)};
|
||||||
executor.AttachTexture(&*dstTextureView);
|
executor.AttachTexture(&*dstTextureView);
|
||||||
}
|
|
||||||
|
|
||||||
auto getSubresourceLayers{[](const vk::ImageSubresourceRange &range, vk::ImageAspectFlags aspect) {
|
auto getSubresourceLayers{[](const vk::ImageSubresourceRange &range, vk::ImageAspectFlags aspect) {
|
||||||
return vk::ImageSubresourceLayers{
|
return vk::ImageSubresourceLayers{
|
||||||
@ -145,7 +141,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
};
|
};
|
||||||
|
|
||||||
executor.AddOutsideRpCommand([region, srcTextureView, dstTextureView, linearFilter](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &) {
|
executor.AddOutsideRpCommand([region, srcTextureView, dstTextureView, linearFilter](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &) {
|
||||||
std::scoped_lock lock{*srcTextureView, *dstTextureView};
|
|
||||||
auto blitSrcImage{srcTextureView->texture->GetBacking()};
|
auto blitSrcImage{srcTextureView->texture->GetBacking()};
|
||||||
auto blitDstImage{dstTextureView->texture->GetBacking()};
|
auto blitDstImage{dstTextureView->texture->GetBacking()};
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include "command_executor.h"
|
#include "command_executor.h"
|
||||||
|
|
||||||
namespace skyline::gpu::interconnect {
|
namespace skyline::gpu::interconnect {
|
||||||
CommandExecutor::CommandExecutor(const DeviceState &state) : gpu{*state.gpu}, activeCommandBuffer{gpu.scheduler.AllocateCommandBuffer()}, cycle{activeCommandBuffer.GetFenceCycle()}, megaBuffer{gpu.buffer.AcquireMegaBuffer(cycle)} {}
|
CommandExecutor::CommandExecutor(const DeviceState &state) : gpu{*state.gpu}, activeCommandBuffer{gpu.scheduler.AllocateCommandBuffer()}, cycle{activeCommandBuffer.GetFenceCycle()}, megaBuffer{gpu.buffer.AcquireMegaBuffer(cycle)}, tag{AllocateTag()} {}
|
||||||
|
|
||||||
CommandExecutor::~CommandExecutor() {
|
CommandExecutor::~CommandExecutor() {
|
||||||
cycle->Cancel();
|
cycle->Cancel();
|
||||||
@ -71,23 +71,48 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::AttachTexture(TextureView *view) {
|
CommandExecutor::LockedTexture::LockedTexture(std::shared_ptr<Texture> texture) : texture{std::move(texture)} {}
|
||||||
auto texture{view->texture.get()};
|
|
||||||
if (!attachedTextures.contains(texture)) {
|
constexpr CommandExecutor::LockedTexture::LockedTexture(CommandExecutor::LockedTexture &&other) : texture{std::exchange(other.texture, nullptr)} {}
|
||||||
texture->WaitOnFence();
|
|
||||||
texture->cycle = cycle;
|
constexpr Texture *CommandExecutor::LockedTexture::operator->() const {
|
||||||
attachedTextures.emplace(texture);
|
return texture.get();
|
||||||
}
|
|
||||||
cycle->AttachObject(view->shared_from_this());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::AttachBuffer(BufferView &view) {
|
CommandExecutor::LockedTexture::~LockedTexture() {
|
||||||
view->buffer->SynchronizeHost();
|
if (texture)
|
||||||
|
texture->unlock();
|
||||||
if (!attachedBuffers.contains(view.bufferDelegate)) {
|
|
||||||
view.AttachCycle(cycle);
|
|
||||||
attachedBuffers.emplace(view.bufferDelegate);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CommandExecutor::AttachTexture(TextureView *view) {
|
||||||
|
bool didLock{view->LockWithTag(tag)};
|
||||||
|
if (didLock)
|
||||||
|
attachedTextures.emplace_back(view->texture);
|
||||||
|
return didLock;
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandExecutor::LockedBuffer::LockedBuffer(std::shared_ptr<Buffer> buffer) : buffer{std::move(buffer)} {}
|
||||||
|
|
||||||
|
constexpr CommandExecutor::LockedBuffer::LockedBuffer(CommandExecutor::LockedBuffer &&other) : buffer{std::exchange(other.buffer, nullptr)} {}
|
||||||
|
|
||||||
|
constexpr Buffer *CommandExecutor::LockedBuffer::operator->() const {
|
||||||
|
return buffer.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandExecutor::LockedBuffer::~LockedBuffer() {
|
||||||
|
if (buffer)
|
||||||
|
buffer->unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CommandExecutor::AttachBuffer(BufferView &view) {
|
||||||
|
bool didLock{view->LockWithTag(tag)};
|
||||||
|
if (didLock)
|
||||||
|
attachedBuffers.emplace_back(view->buffer);
|
||||||
|
|
||||||
|
if (!attachedBufferDelegates.contains(view.bufferDelegate))
|
||||||
|
attachedBufferDelegates.emplace(view.bufferDelegate);
|
||||||
|
|
||||||
|
return didLock;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::AttachDependency(const std::shared_ptr<FenceCycleDependency> &dependency) {
|
void CommandExecutor::AttachDependency(const std::shared_ptr<FenceCycleDependency> &dependency) {
|
||||||
@ -178,12 +203,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||||
});
|
});
|
||||||
|
|
||||||
for (auto texture : attachedTextures) {
|
for (const auto &texture : attachedTextures) {
|
||||||
texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true);
|
texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true);
|
||||||
texture->MarkGpuDirty();
|
texture->MarkGpuDirty();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &delegate : attachedBuffers)
|
for (const auto &delegate : attachedBufferDelegates)
|
||||||
delegate->usageCallback = nullptr;
|
delegate->usageCallback = nullptr;
|
||||||
|
|
||||||
vk::RenderPass lRenderPass;
|
vk::RenderPass lRenderPass;
|
||||||
@ -213,14 +238,19 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
|
|
||||||
commandBuffer.end();
|
commandBuffer.end();
|
||||||
|
|
||||||
|
for (const auto &attachedBuffer : attachedBuffers)
|
||||||
|
attachedBuffer->SynchronizeHost(); // Synchronize attached buffers from the CPU without using a staging buffer, this is done directly prior to submission to prevent stalls
|
||||||
|
|
||||||
gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence());
|
gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence());
|
||||||
|
|
||||||
for (const auto &delegate : attachedBuffers)
|
for (const auto &delegate : attachedBufferDelegates)
|
||||||
delegate->view->megabufferOffset = 0;
|
delegate->view->megabufferOffset = 0;
|
||||||
|
|
||||||
nodes.clear();
|
nodes.clear();
|
||||||
attachedTextures.clear();
|
attachedTextures.clear();
|
||||||
attachedBuffers.clear();
|
attachedBuffers.clear();
|
||||||
|
attachedBufferDelegates.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,9 +20,46 @@ namespace skyline::gpu::interconnect {
|
|||||||
node::RenderPassNode *renderPass{};
|
node::RenderPassNode *renderPass{};
|
||||||
size_t subpassCount{}; //!< The number of subpasses in the current render pass
|
size_t subpassCount{}; //!< The number of subpasses in the current render pass
|
||||||
|
|
||||||
std::unordered_set<Texture *> attachedTextures; //!< All textures that need to be synced prior to and after execution
|
/**
|
||||||
|
* @brief A wrapper of a Texture object that has been locked beforehand and must be unlocked afterwards
|
||||||
|
*/
|
||||||
|
struct LockedTexture {
|
||||||
|
std::shared_ptr<Texture> texture;
|
||||||
|
|
||||||
|
explicit LockedTexture(std::shared_ptr<Texture> texture);
|
||||||
|
|
||||||
|
LockedTexture(const LockedTexture &) = delete;
|
||||||
|
|
||||||
|
constexpr LockedTexture(LockedTexture &&other);
|
||||||
|
|
||||||
|
constexpr Texture *operator->() const;
|
||||||
|
|
||||||
|
~LockedTexture();
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<LockedTexture> attachedTextures; //!< All textures that are attached to the current execution
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A wrapper of a Buffer object that has been locked beforehand and must be unlocked afterwards
|
||||||
|
*/
|
||||||
|
struct LockedBuffer {
|
||||||
|
std::shared_ptr<Buffer> buffer;
|
||||||
|
|
||||||
|
LockedBuffer(std::shared_ptr<Buffer> buffer);
|
||||||
|
|
||||||
|
LockedBuffer(const LockedBuffer &) = delete;
|
||||||
|
|
||||||
|
constexpr LockedBuffer(LockedBuffer &&other);
|
||||||
|
|
||||||
|
constexpr Buffer *operator->() const;
|
||||||
|
|
||||||
|
~LockedBuffer();
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<LockedBuffer> attachedBuffers; //!< All textures that are attached to the current execution
|
||||||
|
|
||||||
using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
|
using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
|
||||||
std::unordered_set<SharedBufferDelegate> attachedBuffers; //!< All buffers that are attached to the current execution
|
std::unordered_set<SharedBufferDelegate> attachedBufferDelegates; //!< All buffers that are attached to the current execution
|
||||||
|
|
||||||
std::vector<TextureView *> lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass
|
std::vector<TextureView *> lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass
|
||||||
span<TextureView *> lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass
|
span<TextureView *> lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass
|
||||||
@ -52,6 +89,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
public:
|
public:
|
||||||
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
||||||
MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU
|
MegaBuffer megaBuffer; //!< The megabuffer used to temporarily store buffer modifications allowing them to be replayed in-sequence on the GPU
|
||||||
|
ContextTag tag; //!< The tag associated with this command executor, any tagged resource locking must utilize this tag
|
||||||
|
|
||||||
CommandExecutor(const DeviceState &state);
|
CommandExecutor(const DeviceState &state);
|
||||||
|
|
||||||
@ -59,17 +97,19 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Attach the lifetime of the texture to the command buffer
|
* @brief Attach the lifetime of the texture to the command buffer
|
||||||
* @note The supplied texture **must** be locked by the calling thread
|
* @return If this is the first usage of the backing of this resource within this execution
|
||||||
|
* @note The supplied texture will be locked automatically until the command buffer is submitted and must **not** be locked by the caller
|
||||||
* @note This'll automatically handle syncing of the texture in the most optimal way possible
|
* @note This'll automatically handle syncing of the texture in the most optimal way possible
|
||||||
*/
|
*/
|
||||||
void AttachTexture(TextureView *view);
|
bool AttachTexture(TextureView *view);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Attach the lifetime of a buffer to the command buffer
|
* @brief Attach the lifetime of a buffer to the command buffer
|
||||||
* @note The supplied buffer **must** be locked by the calling thread
|
* @return If this is the first usage of the backing of this resource within this execution
|
||||||
|
* @note The supplied buffer will be locked automatically until the command buffer is submitted and must **not** be locked by the caller
|
||||||
* @note This'll automatically handle syncing of the buffer in the most optimal way possible
|
* @note This'll automatically handle syncing of the buffer in the most optimal way possible
|
||||||
*/
|
*/
|
||||||
void AttachBuffer(BufferView &view);
|
bool AttachBuffer(BufferView &view);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Attach the lifetime of the fence cycle dependency to the command buffer
|
* @brief Attach the lifetime of the fence cycle dependency to the command buffer
|
||||||
|
@ -400,7 +400,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
return vk::ImageViewType::e2D;
|
return vk::ImageViewType::e2D;
|
||||||
}();
|
}();
|
||||||
|
|
||||||
renderTarget.view = gpu.texture.FindOrCreate(renderTarget.guest);
|
renderTarget.view = gpu.texture.FindOrCreate(renderTarget.guest, executor.tag);
|
||||||
return renderTarget.view.get();
|
return renderTarget.view.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -522,7 +522,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ClearColorRt(TextureView *renderTarget, vk::Rect2D scissor, u32 layerIndex) {
|
void ClearColorRt(TextureView *renderTarget, vk::Rect2D scissor, u32 layerIndex) {
|
||||||
std::scoped_lock lock{*renderTarget};
|
|
||||||
executor.AttachTexture(renderTarget);
|
executor.AttachTexture(renderTarget);
|
||||||
|
|
||||||
scissor.extent.width = static_cast<u32>(std::min(static_cast<i32>(renderTarget->texture->dimensions.width) - scissor.offset.x,
|
scissor.extent.width = static_cast<u32>(std::min(static_cast<i32>(renderTarget->texture->dimensions.width) - scissor.offset.x,
|
||||||
@ -554,7 +553,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ClearDepthStencilRt(TextureView *renderTarget, vk::ImageAspectFlags aspect, u32 layerIndex) {
|
void ClearDepthStencilRt(TextureView *renderTarget, vk::ImageAspectFlags aspect, u32 layerIndex) {
|
||||||
std::scoped_lock lock{*renderTarget};
|
|
||||||
executor.AttachTexture(renderTarget);
|
executor.AttachTexture(renderTarget);
|
||||||
|
|
||||||
if (renderTarget->range.layerCount == 1 && layerIndex == 0) {
|
if (renderTarget->range.layerCount == 1 && layerIndex == 0) {
|
||||||
@ -628,7 +626,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
T Read(CommandExecutor &pExecutor, size_t dstOffset) const {
|
T Read(CommandExecutor &pExecutor, size_t dstOffset) const {
|
||||||
T object;
|
T object;
|
||||||
std::scoped_lock lock{view};
|
ContextLock lock{pExecutor.tag, view};
|
||||||
view.Read(pExecutor.cycle, []() {
|
view.Read(pExecutor.cycle, []() {
|
||||||
// TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case
|
// TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case
|
||||||
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
||||||
@ -644,7 +642,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
void Write(CommandExecutor &pExecutor, MegaBuffer &megaBuffer, span<T> buf, size_t dstOffset) {
|
void Write(CommandExecutor &pExecutor, MegaBuffer &megaBuffer, span<T> buf, size_t dstOffset) {
|
||||||
auto srcCpuBuf{buf.template cast<u8>()};
|
auto srcCpuBuf{buf.template cast<u8>()};
|
||||||
|
|
||||||
std::scoped_lock lock{view};
|
ContextLock lock{pExecutor.tag, view};
|
||||||
view.Write(pExecutor.cycle, []() {
|
view.Write(pExecutor.cycle, []() {
|
||||||
// TODO: see Read()
|
// TODO: see Read()
|
||||||
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
||||||
@ -652,7 +650,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
auto srcGpuOffset{megaBuffer.Push(srcCpuBuf)};
|
auto srcGpuOffset{megaBuffer.Push(srcCpuBuf)};
|
||||||
auto srcGpuBuf{megaBuffer.GetBacking()};
|
auto srcGpuBuf{megaBuffer.GetBacking()};
|
||||||
pExecutor.AddOutsideRpCommand([=](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
pExecutor.AddOutsideRpCommand([=](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
||||||
std::scoped_lock lock{view};
|
|
||||||
vk::BufferCopy copyRegion{
|
vk::BufferCopy copyRegion{
|
||||||
.size = srcCpuBuf.size_bytes(),
|
.size = srcCpuBuf.size_bytes(),
|
||||||
.srcOffset = srcGpuOffset,
|
.srcOffset = srcGpuOffset,
|
||||||
@ -728,7 +725,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
auto view{constantBufferCache.Lookup(constantBufferSelector.size, constantBufferSelector.iova)};
|
auto view{constantBufferCache.Lookup(constantBufferSelector.size, constantBufferSelector.iova)};
|
||||||
if (!view) {
|
if (!view) {
|
||||||
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
|
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
|
||||||
view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
|
view = gpu.buffer.FindOrCreate(mappings.front(), executor.tag);
|
||||||
constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view);
|
constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -919,7 +916,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
if (mappings.size() != 1)
|
if (mappings.size() != 1)
|
||||||
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
||||||
|
|
||||||
return gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
|
return gpu.buffer.FindOrCreate(mappings.front(), executor.tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1108,8 +1105,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
});
|
});
|
||||||
|
|
||||||
auto view{pipelineStage.constantBuffers[constantBuffer.index].view};
|
auto view{pipelineStage.constantBuffers[constantBuffer.index].view};
|
||||||
|
executor.AttachBuffer(view);
|
||||||
std::scoped_lock lock(view);
|
|
||||||
if (auto megaBufferOffset{view.AcquireMegaBuffer(executor.megaBuffer)}) {
|
if (auto megaBufferOffset{view.AcquireMegaBuffer(executor.megaBuffer)}) {
|
||||||
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
|
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
|
||||||
bufferDescriptors[bufferIndex] = vk::DescriptorBufferInfo{
|
bufferDescriptors[bufferIndex] = vk::DescriptorBufferInfo{
|
||||||
@ -1127,7 +1123,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
executor.AttachBuffer(view);
|
|
||||||
bufferIndex++;
|
bufferIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1149,8 +1144,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
});
|
});
|
||||||
|
|
||||||
auto view{GetSsboViewFromDescriptor(storageBuffer, pipelineStage.constantBuffers)};
|
auto view{GetSsboViewFromDescriptor(storageBuffer, pipelineStage.constantBuffers)};
|
||||||
|
executor.AttachBuffer(view);
|
||||||
|
|
||||||
std::scoped_lock lock{view};
|
|
||||||
if (storageBuffer.is_written)
|
if (storageBuffer.is_written)
|
||||||
view->buffer->MarkGpuDirty();
|
view->buffer->MarkGpuDirty();
|
||||||
|
|
||||||
@ -1161,7 +1156,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
.range = view.size,
|
.range = view.size,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
executor.AttachBuffer(view);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1204,16 +1198,16 @@ namespace skyline::gpu::interconnect {
|
|||||||
handle.samplerIndex = handle.textureIndex;
|
handle.samplerIndex = handle.textureIndex;
|
||||||
|
|
||||||
auto sampler{GetSampler(handle.samplerIndex)};
|
auto sampler{GetSampler(handle.samplerIndex)};
|
||||||
auto textureView{GetPoolTextureView(handle.textureIndex)};
|
executor.AttachDependency(sampler);
|
||||||
|
|
||||||
|
auto textureView{GetPoolTextureView(handle.textureIndex)};
|
||||||
|
executor.AttachTexture(textureView.get());
|
||||||
|
|
||||||
std::scoped_lock lock(*textureView);
|
|
||||||
imageDescriptors[imageIndex++] = vk::DescriptorImageInfo{
|
imageDescriptors[imageIndex++] = vk::DescriptorImageInfo{
|
||||||
.sampler = **sampler,
|
.sampler = **sampler,
|
||||||
.imageView = textureView->GetView(),
|
.imageView = textureView->GetView(),
|
||||||
.imageLayout = textureView->texture->layout,
|
.imageLayout = textureView->texture->layout,
|
||||||
};
|
};
|
||||||
executor.AttachTexture(textureView.get());
|
|
||||||
executor.AttachDependency(std::move(sampler));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1851,7 +1845,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
if (mappings.size() != 1)
|
if (mappings.size() != 1)
|
||||||
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
||||||
|
|
||||||
vertexBuffer.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
|
vertexBuffer.view = gpu.buffer.FindOrCreate(mappings.front(), executor.tag);
|
||||||
return &vertexBuffer;
|
return &vertexBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2335,7 +2329,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
return textureView;
|
return textureView;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto textureView{gpu.texture.FindOrCreate(poolTexture.guest)};
|
auto textureView{gpu.texture.FindOrCreate(poolTexture.guest, executor.tag)};
|
||||||
poolTexture.view = textureView;
|
poolTexture.view = textureView;
|
||||||
return textureView;
|
return textureView;
|
||||||
}
|
}
|
||||||
@ -2606,7 +2600,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
||||||
|
|
||||||
auto mapping{mappings.front()};
|
auto mapping{mappings.front()};
|
||||||
indexBuffer.view = gpu.buffer.FindOrCreate(span<u8>(mapping.data(), size), executor.cycle);
|
indexBuffer.view = gpu.buffer.FindOrCreate(span<u8>(mapping.data(), size), executor.tag);
|
||||||
return indexBuffer.view;
|
return indexBuffer.view;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2822,8 +2816,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
throw exception("Indexed quad conversion is not supported");
|
throw exception("Indexed quad conversion is not supported");
|
||||||
|
|
||||||
auto indexBufferView{GetIndexBuffer(count)};
|
auto indexBufferView{GetIndexBuffer(count)};
|
||||||
|
executor.AttachBuffer(indexBufferView);
|
||||||
std::scoped_lock lock(indexBufferView);
|
|
||||||
|
|
||||||
boundIndexBuffer = std::make_shared<BoundIndexBuffer>();
|
boundIndexBuffer = std::make_shared<BoundIndexBuffer>();
|
||||||
boundIndexBuffer->type = indexBuffer.type;
|
boundIndexBuffer->type = indexBuffer.type;
|
||||||
@ -2837,18 +2830,16 @@ namespace skyline::gpu::interconnect {
|
|||||||
boundIndexBuffer->offset = view.offset;
|
boundIndexBuffer->offset = view.offset;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
executor.AttachBuffer(indexBufferView);
|
|
||||||
} else if (needsQuadConversion) {
|
} else if (needsQuadConversion) {
|
||||||
// Convert the guest-supplied quad list to an indexed triangle list
|
// Convert the guest-supplied quad list to an indexed triangle list
|
||||||
auto[bufferView, indexType, indexCount] = GetNonIndexedQuadConversionBuffer(count);
|
auto[bufferView, indexType, indexCount] = GetNonIndexedQuadConversionBuffer(count);
|
||||||
std::scoped_lock lock(bufferView);
|
executor.AttachBuffer(bufferView);
|
||||||
|
|
||||||
count = indexCount;
|
count = indexCount;
|
||||||
boundIndexBuffer = std::make_shared<BoundIndexBuffer>();
|
boundIndexBuffer = std::make_shared<BoundIndexBuffer>();
|
||||||
boundIndexBuffer->type = indexType;
|
boundIndexBuffer->type = indexType;
|
||||||
boundIndexBuffer->handle = bufferView->buffer->GetBacking();
|
boundIndexBuffer->handle = bufferView->buffer->GetBacking();
|
||||||
boundIndexBuffer->offset = bufferView->view->offset;
|
boundIndexBuffer->offset = bufferView->view->offset;
|
||||||
executor.AttachBuffer(bufferView);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vertex Buffer Setup
|
// Vertex Buffer Setup
|
||||||
@ -2864,13 +2855,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
for (u32 index{}; index < maxwell3d::VertexBufferCount; index++) {
|
for (u32 index{}; index < maxwell3d::VertexBufferCount; index++) {
|
||||||
auto vertexBuffer{GetVertexBuffer(index)};
|
auto vertexBuffer{GetVertexBuffer(index)};
|
||||||
if (vertexBuffer) {
|
if (vertexBuffer) {
|
||||||
auto &vertexBufferView{vertexBuffer->view};
|
|
||||||
vertexBindingDescriptions.push_back(vertexBuffer->bindingDescription);
|
vertexBindingDescriptions.push_back(vertexBuffer->bindingDescription);
|
||||||
if (vertexBuffer->bindingDescription.inputRate == vk::VertexInputRate::eInstance)
|
if (vertexBuffer->bindingDescription.inputRate == vk::VertexInputRate::eInstance)
|
||||||
vertexBindingDivisorsDescriptions.push_back(vertexBuffer->bindingDivisorDescription);
|
vertexBindingDivisorsDescriptions.push_back(vertexBuffer->bindingDivisorDescription);
|
||||||
|
|
||||||
std::scoped_lock vertexBufferLock(vertexBufferView);
|
auto &vertexBufferView{vertexBuffer->view};
|
||||||
|
executor.AttachBuffer(vertexBufferView);
|
||||||
if (auto megaBufferOffset{vertexBufferView.AcquireMegaBuffer(executor.megaBuffer)}) {
|
if (auto megaBufferOffset{vertexBufferView.AcquireMegaBuffer(executor.megaBuffer)}) {
|
||||||
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
|
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
|
||||||
boundVertexBuffers->handles[index] = executor.megaBuffer.GetBacking();
|
boundVertexBuffers->handles[index] = executor.megaBuffer.GetBacking();
|
||||||
@ -2881,7 +2871,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
*offset = view.offset;
|
*offset = view.offset;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
executor.AttachBuffer(vertexBufferView);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2896,9 +2885,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
for (u32 index{}; index < maxwell3d::RenderTargetCount; index++) {
|
for (u32 index{}; index < maxwell3d::RenderTargetCount; index++) {
|
||||||
auto renderTarget{GetColorRenderTarget(index)};
|
auto renderTarget{GetColorRenderTarget(index)};
|
||||||
if (renderTarget) {
|
if (renderTarget) {
|
||||||
std::scoped_lock lock(*renderTarget);
|
|
||||||
activeColorRenderTargets.push_back(renderTarget);
|
|
||||||
executor.AttachTexture(renderTarget);
|
executor.AttachTexture(renderTarget);
|
||||||
|
activeColorRenderTargets.push_back(renderTarget);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2906,10 +2894,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
// Depth/Stencil Render Target Setup
|
// Depth/Stencil Render Target Setup
|
||||||
auto depthRenderTargetView{GetDepthRenderTarget()};
|
auto depthRenderTargetView{GetDepthRenderTarget()};
|
||||||
if (depthRenderTargetView) {
|
if (depthRenderTargetView)
|
||||||
std::scoped_lock lock(*depthRenderTargetView);
|
|
||||||
executor.AttachTexture(depthRenderTargetView);
|
executor.AttachTexture(depthRenderTargetView);
|
||||||
}
|
|
||||||
|
|
||||||
// Pipeline Creation
|
// Pipeline Creation
|
||||||
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
|
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
|
||||||
|
73
app/src/main/cpp/skyline/gpu/tag_allocator.h
Normal file
73
app/src/main/cpp/skyline/gpu/tag_allocator.h
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <common/base.h>
|
||||||
|
|
||||||
|
namespace skyline::gpu {
|
||||||
|
struct ContextTag;
|
||||||
|
|
||||||
|
static ContextTag AllocateTag();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A unique tag associated with a single "context" an abstraction to allow concurrent locking of resources by different parts of a single context
|
||||||
|
*/
|
||||||
|
struct ContextTag {
|
||||||
|
private:
|
||||||
|
size_t key;
|
||||||
|
|
||||||
|
friend ContextTag AllocateTag();
|
||||||
|
|
||||||
|
constexpr ContextTag(size_t key) : key{key} {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
constexpr ContextTag() : key{} {}
|
||||||
|
|
||||||
|
constexpr bool operator==(const ContextTag &other) const {
|
||||||
|
return key == other.key;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool operator!=(const ContextTag &other) const {
|
||||||
|
return key != other.key;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr operator bool() const {
|
||||||
|
return key != 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return A globally unique tag to utilize for any operations
|
||||||
|
*/
|
||||||
|
inline ContextTag AllocateTag() {
|
||||||
|
static std::atomic<size_t> key{1};
|
||||||
|
return ContextTag{key++};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A scoped lock specially designed for classes with ContextTag-based locking
|
||||||
|
* @note This will unlock the tag when the scope is exited, **if** it locked the tag in the first place
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
class ContextLock {
|
||||||
|
private:
|
||||||
|
T &resource;
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool isFirst; //!< If this was the first lock for this context
|
||||||
|
|
||||||
|
ContextLock(ContextTag tag, T &resource) : resource{resource}, isFirst{resource.LockWithTag(tag)} {}
|
||||||
|
|
||||||
|
ContextLock(const ContextLock &) = delete;
|
||||||
|
|
||||||
|
ContextLock(ContextLock &&other) noexcept : resource{other.resource}, isFirst{other.isFirst} {
|
||||||
|
other.isFirst = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
~ContextLock() {
|
||||||
|
if (isFirst)
|
||||||
|
resource.unlock();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
@ -106,6 +106,21 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TextureView::LockWithTag(ContextTag tag) {
|
||||||
|
auto backing{std::atomic_load(&texture)};
|
||||||
|
while (true) {
|
||||||
|
bool didLock{backing->LockWithTag(tag)};
|
||||||
|
|
||||||
|
auto latestBacking{std::atomic_load(&texture)};
|
||||||
|
if (backing == latestBacking)
|
||||||
|
return didLock;
|
||||||
|
|
||||||
|
if (didLock)
|
||||||
|
backing->unlock();
|
||||||
|
backing = latestBacking;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TextureView::unlock() {
|
void TextureView::unlock() {
|
||||||
texture->unlock();
|
texture->unlock();
|
||||||
}
|
}
|
||||||
@ -564,12 +579,34 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Texture::~Texture() {
|
Texture::~Texture() {
|
||||||
std::scoped_lock lock{*this};
|
|
||||||
if (trapHandle)
|
if (trapHandle)
|
||||||
gpu.state.nce->DeleteTrap(*trapHandle);
|
gpu.state.nce->DeleteTrap(*trapHandle);
|
||||||
SynchronizeGuest(true);
|
SynchronizeGuest(true);
|
||||||
if (alignedMirror.valid())
|
if (alignedMirror.valid())
|
||||||
munmap(alignedMirror.data(), alignedMirror.size());
|
munmap(alignedMirror.data(), alignedMirror.size());
|
||||||
|
WaitOnFence();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Texture::lock() {
|
||||||
|
mutex.lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Texture::LockWithTag(ContextTag pTag) {
|
||||||
|
if (pTag && pTag == tag)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mutex.lock();
|
||||||
|
tag = pTag;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Texture::unlock() {
|
||||||
|
tag = ContextTag{};
|
||||||
|
mutex.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Texture::try_lock() {
|
||||||
|
return mutex.try_lock();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Texture::MarkGpuDirty() {
|
void Texture::MarkGpuDirty() {
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <nce.h>
|
#include <nce.h>
|
||||||
|
#include <gpu/tag_allocator.h>
|
||||||
#include <gpu/memory_manager.h>
|
#include <gpu/memory_manager.h>
|
||||||
|
|
||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
@ -314,6 +315,14 @@ namespace skyline::gpu {
|
|||||||
*/
|
*/
|
||||||
void lock();
|
void lock();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||||
|
* @param tag A tag to associate with the lock, future invocations with the same tag prior to the unlock will acquire the lock without waiting (0 is not a valid tag value and will disable tag behavior)
|
||||||
|
* @return If the lock was acquired by this call rather than having the same tag as the holder
|
||||||
|
* @note All locks using the same tag **must** be from the same thread as it'll only have one corresponding unlock() call
|
||||||
|
*/
|
||||||
|
bool LockWithTag(ContextTag tag);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Relinquishes an existing lock on the backing texture by the calling thread
|
* @brief Relinquishes an existing lock on the backing texture by the calling thread
|
||||||
* @note Naming is in accordance to the BasicLockable named requirement
|
* @note Naming is in accordance to the BasicLockable named requirement
|
||||||
@ -345,6 +354,7 @@ namespace skyline::gpu {
|
|||||||
private:
|
private:
|
||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing
|
std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing
|
||||||
|
std::atomic<ContextTag> tag{}; //!< The tag associated with the last lock call
|
||||||
std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in
|
std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in
|
||||||
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
|
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
|
||||||
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
|
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
|
||||||
@ -467,25 +477,27 @@ namespace skyline::gpu {
|
|||||||
* @brief Acquires an exclusive lock on the texture for the calling thread
|
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||||
* @note Naming is in accordance to the BasicLockable named requirement
|
* @note Naming is in accordance to the BasicLockable named requirement
|
||||||
*/
|
*/
|
||||||
void lock() {
|
void lock();
|
||||||
mutex.lock();
|
|
||||||
}
|
/**
|
||||||
|
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||||
|
* @param tag A tag to associate with the lock, future invocations with the same tag prior to the unlock will acquire the lock without waiting (0 is not a valid tag value and will disable tag behavior)
|
||||||
|
* @return If the lock was acquired by this call rather than having the same tag as the holder
|
||||||
|
* @note All locks using the same tag **must** be from the same thread as it'll only have one corresponding unlock() call
|
||||||
|
*/
|
||||||
|
bool LockWithTag(ContextTag tag);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Relinquishes an existing lock on the texture by the calling thread
|
* @brief Relinquishes an existing lock on the texture by the calling thread
|
||||||
* @note Naming is in accordance to the BasicLockable named requirement
|
* @note Naming is in accordance to the BasicLockable named requirement
|
||||||
*/
|
*/
|
||||||
void unlock() {
|
void unlock();
|
||||||
mutex.unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
|
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
|
||||||
* @note Naming is in accordance to the Lockable named requirement
|
* @note Naming is in accordance to the Lockable named requirement
|
||||||
*/
|
*/
|
||||||
bool try_lock() {
|
bool try_lock();
|
||||||
return mutex.try_lock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Marks the texture as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
|
* @brief Marks the texture as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
TextureManager::TextureManager(GPU &gpu) : gpu(gpu) {}
|
TextureManager::TextureManager(GPU &gpu) : gpu(gpu) {}
|
||||||
|
|
||||||
std::shared_ptr<TextureView> TextureManager::FindOrCreate(const GuestTexture &guestTexture) {
|
std::shared_ptr<TextureView> TextureManager::FindOrCreate(const GuestTexture &guestTexture, ContextTag tag) {
|
||||||
auto guestMapping{guestTexture.mappings.front()};
|
auto guestMapping{guestTexture.mappings.front()};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -52,6 +52,7 @@ namespace skyline::gpu {
|
|||||||
|| matchGuestTexture.viewMipBase > 0)
|
|| matchGuestTexture.viewMipBase > 0)
|
||||||
&& matchGuestTexture.tileConfig == guestTexture.tileConfig) {
|
&& matchGuestTexture.tileConfig == guestTexture.tileConfig) {
|
||||||
auto &texture{hostMapping->texture};
|
auto &texture{hostMapping->texture};
|
||||||
|
ContextLock textureLock{tag, *texture};
|
||||||
return texture->GetView(guestTexture.viewType, vk::ImageSubresourceRange{
|
return texture->GetView(guestTexture.viewType, vk::ImageSubresourceRange{
|
||||||
.aspectMask = guestTexture.aspect,
|
.aspectMask = guestTexture.aspect,
|
||||||
.baseMipLevel = guestTexture.viewMipBase,
|
.baseMipLevel = guestTexture.viewMipBase,
|
||||||
|
@ -35,6 +35,6 @@ namespace skyline::gpu {
|
|||||||
/**
|
/**
|
||||||
* @return A pre-existing or newly created Texture object which matches the specified criteria
|
* @return A pre-existing or newly created Texture object which matches the specified criteria
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<TextureView> FindOrCreate(const GuestTexture &guestTexture);
|
std::shared_ptr<TextureView> FindOrCreate(const GuestTexture &guestTexture, ContextTag tag = {});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user