Rework Texture Synchronization API + Locking

Similar to `Buffer`s, `Texture`s suffered from unoptimal behavior due to using atomics for `DirtyState` and had certain bugs with replacement of the variable at times where it shouldn't be possible which have now been fixed by moving to using a mutex instead of atomics. This commit also updates the API to more closely match what is expected of it now and removes any functions that weren't utilized such as `SynchronizeGuestWithBuffer`.
This commit is contained in:
PixelyIon 2022-07-29 02:34:58 +05:30
parent 04bcd7e580
commit 8a62f8d37b
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
3 changed files with 88 additions and 122 deletions

View File

@ -255,10 +255,8 @@ namespace skyline::gpu::interconnect {
}, {}, {} }, {}, {}
); );
for (const auto &texture : attachedTextures) { for (const auto &texture : attachedTextures)
texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true); texture->SynchronizeHostInline(commandBuffer, cycle, true);
texture->MarkGpuDirty();
}
vk::RenderPass lRenderPass; vk::RenderPass lRenderPass;
u32 subpassIndex; u32 subpassIndex;

View File

@ -150,7 +150,13 @@ namespace skyline::gpu {
if (!texture) if (!texture)
return; return;
std::scoped_lock lock{*texture}; std::unique_lock stateLock{texture->stateMutex};
if (texture->dirtyState == DirtyState::GpuDirty) {
stateLock.unlock(); // If the lock isn't unlocked, a deadlock from threads waiting on the other lock can occur
// If this mutex would cause other callbacks to be blocked then we should block on this mutex in advance
std::scoped_lock lock{*texture};
}
}, [weakThis] { }, [weakThis] {
TRACE_EVENT("gpu", "Texture::ReadTrap"); TRACE_EVENT("gpu", "Texture::ReadTrap");
@ -158,11 +164,18 @@ namespace skyline::gpu {
if (!texture) if (!texture)
return true; return true;
std::unique_lock stateLock{texture->stateMutex, std::try_to_lock};
if (!stateLock)
return false;
if (texture->dirtyState != DirtyState::GpuDirty)
return true; // If state is already CPU dirty/Clean we don't need to do anything
std::unique_lock lock{*texture, std::try_to_lock}; std::unique_lock lock{*texture, std::try_to_lock};
if (!lock) if (!lock)
return false; return false;
texture->SynchronizeGuest(true); // We can skip trapping since the caller will do it texture->SynchronizeGuest(false, true); // We can skip trapping since the caller will do it
texture->WaitOnFence(); texture->WaitOnFence();
return true; return true;
}, [weakThis] { }, [weakThis] {
@ -172,9 +185,14 @@ namespace skyline::gpu {
if (!texture) if (!texture)
return true; return true;
DirtyState expectedState{DirtyState::Clean}; std::unique_lock stateLock{texture->stateMutex, std::try_to_lock};
if (texture->dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty) if (!stateLock)
return true; // If we can transition the texture to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the texture is GPU dirty return false;
if (texture->dirtyState != DirtyState::GpuDirty) {
texture->dirtyState = DirtyState::CpuDirty;
return true; // If the texture is already CPU dirty or we can transition it to being CPU dirty then we don't need to do anything
}
std::unique_lock lock{*texture, std::try_to_lock}; std::unique_lock lock{*texture, std::try_to_lock};
if (!lock) if (!lock)
@ -603,19 +621,6 @@ namespace skyline::gpu {
return mutex.try_lock(); return mutex.try_lock();
} }
void Texture::MarkGpuDirty() {
if (!guest || format != guest->format)
return; // We need to skip GPU dirty if the host format and guest format differ as we don't support re-encoding compressed textures which is when this generally occurs
auto currentState{dirtyState.load(std::memory_order_relaxed)};
do {
if (currentState == DirtyState::GpuDirty)
return;
} while (!dirtyState.compare_exchange_strong(currentState, DirtyState::GpuDirty, std::memory_order_relaxed));
gpu.state.nce->RetrapRegions(*trapHandle, false);
}
bool Texture::WaitOnBacking() { bool Texture::WaitOnBacking() {
TRACE_EVENT("gpu", "Texture::WaitOnBacking"); TRACE_EVENT("gpu", "Texture::WaitOnBacking");
@ -675,17 +680,24 @@ namespace skyline::gpu {
} }
} }
void Texture::SynchronizeHost(bool rwTrap) { void Texture::SynchronizeHost(bool gpuDirty) {
if (!guest) if (!guest || format != guest->format)
return; return; // We need to skip GPU dirty if the host format and guest format differ as we don't support re-encoding compressed textures which is when this generally occurs
auto currentState{dirtyState.load(std::memory_order_relaxed)};
do {
if (currentState != DirtyState::CpuDirty)
return; // If the texture has not been modified on the CPU or has no mappings, there is no need to synchronize it
} while (!dirtyState.compare_exchange_strong(currentState, rwTrap ? DirtyState::GpuDirty : DirtyState::Clean, std::memory_order_relaxed));
TRACE_EVENT("gpu", "Texture::SynchronizeHost"); TRACE_EVENT("gpu", "Texture::SynchronizeHost");
{
std::scoped_lock lock{stateMutex};
if (gpuDirty && dirtyState == DirtyState::Clean) {
// If a texture is Clean then we can just transition it to being GPU dirty and retrap it
dirtyState = DirtyState::GpuDirty;
gpu.state.nce->RetrapRegions(*trapHandle, false);
return;
} else if (dirtyState != DirtyState::CpuDirty) {
return; // If the texture has not been modified on the CPU, there is no need to synchronize it
}
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
}
auto stagingBuffer{SynchronizeHostImpl()}; auto stagingBuffer{SynchronizeHostImpl()};
if (stagingBuffer) { if (stagingBuffer) {
@ -697,20 +709,27 @@ namespace skyline::gpu {
cycle = lCycle; cycle = lCycle;
} }
gpu.state.nce->RetrapRegions(*trapHandle, !rwTrap); // Trap any future CPU reads (optionally) + writes to this texture gpu.state.nce->RetrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
} }
void Texture::SynchronizeHostWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &pCycle, bool rwTrap) { void Texture::SynchronizeHostInline(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &pCycle, bool gpuDirty) {
if (!guest) if (!guest || format != guest->format)
return; return; // See SynchronizeHost(...)
auto currentState{dirtyState.load(std::memory_order_relaxed)}; TRACE_EVENT("gpu", "Texture::SynchronizeHostInline");
do {
if (currentState != DirtyState::CpuDirty) {
std::scoped_lock lock{stateMutex};
if (gpuDirty && dirtyState == DirtyState::Clean) {
dirtyState = DirtyState::GpuDirty;
gpu.state.nce->RetrapRegions(*trapHandle, false);
return; return;
} while (!dirtyState.compare_exchange_strong(currentState, rwTrap ? DirtyState::GpuDirty : DirtyState::Clean, std::memory_order_relaxed)); } else if (dirtyState != DirtyState::CpuDirty) {
return;
}
TRACE_EVENT("gpu", "Texture::SynchronizeHostWithBuffer"); dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
}
auto stagingBuffer{SynchronizeHostImpl()}; auto stagingBuffer{SynchronizeHostImpl()};
if (stagingBuffer) { if (stagingBuffer) {
@ -720,26 +739,28 @@ namespace skyline::gpu {
cycle = pCycle; cycle = pCycle;
} }
gpu.state.nce->RetrapRegions(*trapHandle, !rwTrap); // Trap any future CPU reads (optionally) + writes to this texture gpu.state.nce->RetrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
} }
void Texture::SynchronizeGuest(bool skipTrap, bool setDirty) { void Texture::SynchronizeGuest(bool cpuDirty, bool skipTrap) {
if (!guest) if (!guest)
return; return;
auto currentState{dirtyState.load(std::memory_order_relaxed)};
do {
if (currentState == DirtyState::CpuDirty || (currentState == DirtyState::Clean && setDirty))
return; // If the texture is synchronized (Clean/CpuDirty), there is no need to synchronize it
} while (!dirtyState.compare_exchange_strong(currentState, setDirty ? DirtyState::CpuDirty : DirtyState::Clean, std::memory_order_relaxed));
TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
if (!skipTrap) {
gpu.state.nce->RetrapRegions(*trapHandle, !setDirty); std::scoped_lock lock{stateMutex};
if (cpuDirty && dirtyState == DirtyState::Clean) {
dirtyState = DirtyState::CpuDirty;
if (!skipTrap)
gpu.state.nce->DeleteTrap(*trapHandle);
return;
} else if (dirtyState != DirtyState::GpuDirty) {
return;
}
if (setDirty && currentState == DirtyState::Clean) dirtyState = cpuDirty ? DirtyState::CpuDirty : DirtyState::Clean;
return; // If the texture was simply transitioned from Clean to CpuDirty, there is no need to synchronize it }
if (layout == vk::ImageLayout::eUndefined || format != guest->format) if (layout == vk::ImageLayout::eUndefined || format != guest->format)
// If the state of the host texture is undefined then so can the guest // If the state of the host texture is undefined then so can the guest
@ -764,43 +785,12 @@ namespace skyline::gpu {
} else { } else {
throw exception("Host -> Guest synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling)); throw exception("Host -> Guest synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling));
} }
}
void Texture::SynchronizeGuestWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &pCycle) { if (!skipTrap)
if (!guest) if (cpuDirty)
return; gpu.state.nce->DeleteTrap(*trapHandle);
else
auto currentState{dirtyState.load(std::memory_order_relaxed)}; gpu.state.nce->RetrapRegions(*trapHandle, true); // Trap any future CPU writes to this texture
do {
if (currentState != DirtyState::GpuDirty)
return; // If the texture has not been used on the GPU, there is no need to synchronize it
} while (!dirtyState.compare_exchange_strong(currentState, DirtyState::Clean, std::memory_order_relaxed));
TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");
gpu.state.nce->RetrapRegions(*trapHandle, true);
if (layout == vk::ImageLayout::eUndefined || format != guest->format)
// If the state of the host texture is undefined then so can the guest
// If the texture has differing formats on the guest and host, we don't support converting back in that case as it may involve recompression of a decompressed texture
return;
WaitOnBacking();
pCycle->ChainCycle(cycle);
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(surfaceSize)};
CopyIntoStagingBuffer(commandBuffer, stagingBuffer);
pCycle->AttachObject(std::make_shared<TextureBufferCopy>(shared_from_this(), stagingBuffer));
cycle = pCycle;
} else if (tiling == vk::ImageTiling::eLinear) {
CopyToGuest(std::get<memory::Image>(backing).data());
pCycle->AttachObject(std::make_shared<TextureBufferCopy>(shared_from_this()));
cycle = pCycle;
} else {
throw exception("Host -> Guest synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling));
}
} }
std::shared_ptr<TextureView> Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) { std::shared_ptr<TextureView> Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) {

View File

@ -367,9 +367,8 @@ namespace skyline::gpu {
Clean, //!< The CPU mappings are in sync with the GPU texture Clean, //!< The CPU mappings are in sync with the GPU texture
CpuDirty, //!< The CPU mappings have been modified but the GPU texture is not up to date CpuDirty, //!< The CPU mappings have been modified but the GPU texture is not up to date
GpuDirty, //!< The GPU texture has been modified but the CPU mappings have not been updated GpuDirty, //!< The GPU texture has been modified but the CPU mappings have not been updated
}; } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU texture
std::atomic<DirtyState> dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU texture std::recursive_mutex stateMutex; //!< Synchronizes access to the dirty state
static_assert(std::atomic<DirtyState>::is_always_lock_free);
/** /**
* @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s) * @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s)
@ -381,7 +380,7 @@ namespace skyline::gpu {
vk::ImageSubresourceRange range; vk::ImageSubresourceRange range;
vk::raii::ImageView vkView; vk::raii::ImageView vkView;
TextureViewStorage(vk::ImageViewType type, texture::Format format, vk::ComponentMapping mapping, vk::ImageSubresourceRange range, vk::raii::ImageView&& vkView); TextureViewStorage(vk::ImageViewType type, texture::Format format, vk::ComponentMapping mapping, vk::ImageSubresourceRange range, vk::raii::ImageView &&vkView);
}; };
std::vector<TextureViewStorage> views; std::vector<TextureViewStorage> views;
@ -503,13 +502,6 @@ namespace skyline::gpu {
*/ */
bool try_lock(); bool try_lock();
/**
* @brief Marks the texture as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
* @note This **must** be called after syncing the texture to the GPU not before
* @note The texture **must** be locked prior to calling this
*/
void MarkGpuDirty();
/** /**
* @brief Waits on the texture backing to be a valid non-null Vulkan image * @brief Waits on the texture backing to be a valid non-null Vulkan image
* @return If the mutex could be unlocked during the function * @return If the mutex could be unlocked during the function
@ -535,44 +527,30 @@ namespace skyline::gpu {
*/ */
void TransitionLayout(vk::ImageLayout layout); void TransitionLayout(vk::ImageLayout layout);
/**
* @brief Converts the texture to have the specified format
*/
void SetFormat(texture::Format format);
/** /**
* @brief Synchronizes the host texture with the guest after it has been modified * @brief Synchronizes the host texture with the guest after it has been modified
* @param rwTrap If true, the guest buffer will be read/write trapped rather than only being write trapped which is more efficient than calling MarkGpuDirty directly after * @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call
* @note This function is not blocking and the synchronization will not be complete until the associated fence is signalled, it can be waited on with WaitOnFence()
* @note The texture **must** be locked prior to calling this * @note The texture **must** be locked prior to calling this
* @note The guest texture backing should exist prior to calling this
*/ */
void SynchronizeHost(bool rwTrap = false); void SynchronizeHost(bool gpuDirty = false);
/** /**
* @brief Same as SynchronizeHost but this records any commands into the supplied command buffer rather than creating one as necessary * @brief Same as SynchronizeHost but this records any commands into the supplied command buffer rather than creating one as necessary
* @param rwTrap If true, the guest buffer will be read/write trapped rather than only being write trapped which is more efficient than calling MarkGpuDirty directly after * @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call
* @note It is more efficient to call SynchronizeHost than allocating a command buffer purely for this function as it may conditionally not record any commands * @note It is more efficient to call SynchronizeHost than allocating a command buffer purely for this function as it may conditionally not record any commands
* @note The texture **must** be locked prior to calling this * @note The texture **must** be locked prior to calling this
* @note The guest texture backing should exist prior to calling this
*/ */
void SynchronizeHostWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, bool rwTrap = false); void SynchronizeHostInline(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, bool gpuDirty = false);
/** /**
* @brief Synchronizes the guest texture with the host texture after it has been modified * @brief Synchronizes the guest texture with the host texture after it has been modified
* @param skipTrap If true, setting up a CPU trap will be skipped and the dirty state will be Clean/CpuDirty * @param cpuDirty If true, the texture will be transitioned to being CpuDirty by this call
* @param setDirty If true, the texture will be marked as CpuDirty rather than Clean * @param skipTrap If true, trapping/untrapping the guest mappings will be skipped and has to be handled by the caller
* @note This function is not blocking and the synchronization will not be complete until the associated fence is signalled, it can be waited on with WaitOnFence()
* @note The texture **must** be locked prior to calling this * @note The texture **must** be locked prior to calling this
* @note The guest texture should not be null prior to calling this
*/ */
void SynchronizeGuest(bool skipTrap = false, bool setDirty = false); void SynchronizeGuest(bool cpuDirty = false, bool skipTrap = false);
/**
* @brief Synchronizes the guest texture with the host texture after it has been modified
* @note It is more efficient to call SynchronizeHost than allocating a command buffer purely for this function as it may conditionally not record any commands
* @note The texture **must** be locked prior to calling this
* @note The guest texture should not be null prior to calling this
*/
void SynchronizeGuestWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle);
/** /**
* @return A cached or newly created view into this texture with the supplied attributes * @return A cached or newly created view into this texture with the supplied attributes