mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-28 09:45:28 +03:00
Implement usage based implicit renderpass barrier generation
Full pipeline barriers between every RP can be extremely expensive on HW, by analysing the inputs and outputs of a draw it's possible to construct a much more optimal barrier that only syncs what is neccessary.
This commit is contained in:
parent
7a759326b3
commit
99bf7dbb36
@ -633,6 +633,13 @@ namespace skyline::gpu {
|
|||||||
return mirror;
|
return mirror;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Buffer::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
|
if (currentExecutionGpuDirty) {
|
||||||
|
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||||
|
dstStageMask |= dstStage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Buffer::lock() {
|
void Buffer::lock() {
|
||||||
mutex.lock();
|
mutex.lock();
|
||||||
accumulatedCpuLockCounter++;
|
accumulatedCpuLockCounter++;
|
||||||
|
@ -430,6 +430,11 @@ namespace skyline::gpu {
|
|||||||
* @note The buffer **must** be kept locked until the span is no longer in use
|
* @note The buffer **must** be kept locked until the span is no longer in use
|
||||||
*/
|
*/
|
||||||
span<u8> GetReadOnlyBackingSpan(bool isFirstUsage, const std::function<void()> &flushHostCallback);
|
span<u8> GetReadOnlyBackingSpan(bool isFirstUsage, const std::function<void()> &flushHostCallback);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Populates the input src and dst stage masks with appropriate read barrier parameters for the current buffer state
|
||||||
|
*/
|
||||||
|
void PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -279,9 +279,9 @@ namespace skyline::gpu::interconnect {
|
|||||||
return (!a && !b) || (a && b && b->GetView() == a);
|
return (!a && !b) || (a && b && b->GetView() == a);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask) {
|
||||||
auto addSubpass{[&] {
|
auto addSubpass{[&] {
|
||||||
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu);
|
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu, srcStageMask, dstStageMask);
|
||||||
lastSubpassColorAttachments.clear();
|
lastSubpassColorAttachments.clear();
|
||||||
lastSubpassInputAttachments.clear();
|
lastSubpassInputAttachments.clear();
|
||||||
|
|
||||||
@ -415,8 +415,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
cycle->AttachObject(dependency);
|
cycle->AttachObject(dependency);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask) {
|
||||||
bool gotoNext{CreateRenderPassWithSubpass(renderArea, sampledImages, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)};
|
bool gotoNext{CreateRenderPassWithSubpass(renderArea, sampledImages, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation, srcStageMask, dstStageMask)};
|
||||||
if (gotoNext)
|
if (gotoNext)
|
||||||
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
||||||
else
|
else
|
||||||
|
@ -190,7 +190,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
* @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible
|
* @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible
|
||||||
* @return If the next subpass must be started prior to issuing any commands
|
* @return If the next subpass must be started prior to issuing any commands
|
||||||
*/
|
*/
|
||||||
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false);
|
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false, vk::PipelineStageFlags srcStageMask = {}, vk::PipelineStageFlags dstStageMask = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Ends a render pass if one is currently active and resets all corresponding state
|
* @brief Ends a render pass if one is currently active and resets all corresponding state
|
||||||
@ -264,7 +264,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
* @param exclusiveSubpass If this subpass should be the only subpass in a render pass
|
* @param exclusiveSubpass If this subpass should be the only subpass in a render pass
|
||||||
* @note Any supplied texture should be attached prior and not undergo any persistent layout transitions till execution
|
* @note Any supplied texture should be attached prior and not undergo any persistent layout transitions till execution
|
||||||
*/
|
*/
|
||||||
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false);
|
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false, vk::PipelineStageFlags srcStageMask = {}, vk::PipelineStageFlags dstStageMask = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
|
* @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
|
||||||
|
@ -2,21 +2,18 @@
|
|||||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
#include "command_nodes.h"
|
#include "command_nodes.h"
|
||||||
|
#include "gpu/texture/texture.h"
|
||||||
|
#include <vulkan/vulkan_enums.hpp>
|
||||||
|
|
||||||
namespace skyline::gpu::interconnect::node {
|
namespace skyline::gpu::interconnect::node {
|
||||||
RenderPassNode::RenderPassNode(vk::Rect2D renderArea) : subpassDependencies(
|
RenderPassNode::RenderPassNode(vk::Rect2D renderArea)
|
||||||
{
|
: externalDependency{vk::SubpassDependency{
|
||||||
// We assume all past commands have been executed when this RP starts
|
|
||||||
vk::SubpassDependency{
|
|
||||||
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
||||||
.dstSubpass = 0,
|
.dstSubpass = 0,
|
||||||
.srcStageMask = vk::PipelineStageFlagBits::eAllGraphics,
|
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||||
.dstStageMask = vk::PipelineStageFlagBits::eAllGraphics,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||||
}
|
}},
|
||||||
}
|
renderArea{renderArea} {}
|
||||||
), renderArea(renderArea) {}
|
|
||||||
|
|
||||||
u32 RenderPassNode::AddAttachment(TextureView *view, GPU &gpu) {
|
u32 RenderPassNode::AddAttachment(TextureView *view, GPU &gpu) {
|
||||||
auto vkView{view->GetView()};
|
auto vkView{view->GetView()};
|
||||||
@ -42,6 +39,19 @@ namespace skyline::gpu::interconnect::node {
|
|||||||
.finalLayout = view->texture->layout,
|
.finalLayout = view->texture->layout,
|
||||||
.flags = vk::AttachmentDescriptionFlagBits::eMayAlias
|
.flags = vk::AttachmentDescriptionFlagBits::eMayAlias
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (auto usage{view->texture->GetLastRenderPassUsage()}; usage != texture::RenderPassUsage::None) {
|
||||||
|
if (view->format->vkAspect & vk::ImageAspectFlagBits::eColor)
|
||||||
|
externalDependency.dstStageMask |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||||
|
else if (view->format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil))
|
||||||
|
externalDependency.dstStageMask |= vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||||
|
|
||||||
|
if (usage == texture::RenderPassUsage::RenderTarget)
|
||||||
|
externalDependency.srcStageMask |= externalDependency.dstStageMask;
|
||||||
|
else if (usage == texture::RenderPassUsage::Sampled)
|
||||||
|
externalDependency.srcStageMask |= vk::PipelineStageFlagBits::eAllGraphics;
|
||||||
|
}
|
||||||
|
|
||||||
return static_cast<u32>(attachments.size() - 1);
|
return static_cast<u32>(attachments.size() - 1);
|
||||||
} else {
|
} else {
|
||||||
// If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass
|
// If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass
|
||||||
@ -116,7 +126,10 @@ namespace skyline::gpu::interconnect::node {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderPassNode::AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU& gpu) {
|
void RenderPassNode::AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU &gpu, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask) {
|
||||||
|
externalDependency.srcStageMask |= srcStageMask;
|
||||||
|
externalDependency.dstStageMask |= dstStageMask;
|
||||||
|
|
||||||
attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0));
|
attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0));
|
||||||
|
|
||||||
auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)};
|
auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)};
|
||||||
@ -225,6 +238,9 @@ namespace skyline::gpu::interconnect::node {
|
|||||||
preserveAttachmentIt++;
|
preserveAttachmentIt++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (externalDependency.srcStageMask && externalDependency.dstStageMask)
|
||||||
|
subpassDependencies.push_back(externalDependency);
|
||||||
|
|
||||||
auto renderPass{gpu.renderPassCache.GetRenderPass(vk::RenderPassCreateInfo{
|
auto renderPass{gpu.renderPassCache.GetRenderPass(vk::RenderPassCreateInfo{
|
||||||
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
|
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
|
||||||
.pAttachments = attachmentDescriptions.data(),
|
.pAttachments = attachmentDescriptions.data(),
|
||||||
|
@ -48,6 +48,7 @@ namespace skyline::gpu::interconnect::node {
|
|||||||
public:
|
public:
|
||||||
std::vector<vk::SubpassDescription> subpassDescriptions;
|
std::vector<vk::SubpassDescription> subpassDescriptions;
|
||||||
std::vector<vk::SubpassDependency> subpassDependencies;
|
std::vector<vk::SubpassDependency> subpassDependencies;
|
||||||
|
vk::SubpassDependency externalDependency;
|
||||||
|
|
||||||
vk::Rect2D renderArea;
|
vk::Rect2D renderArea;
|
||||||
std::vector<vk::ClearValue> clearValues;
|
std::vector<vk::ClearValue> clearValues;
|
||||||
@ -63,7 +64,7 @@ namespace skyline::gpu::interconnect::node {
|
|||||||
/**
|
/**
|
||||||
* @brief Creates a subpass with the attachments bound in the specified order
|
* @brief Creates a subpass with the attachments bound in the specified order
|
||||||
*/
|
*/
|
||||||
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU& gpu);
|
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU &gpu, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
|
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
|
||||||
|
@ -44,7 +44,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
u64 blockMappingEndAddr; //!< The end GPU address of `blockMapping`
|
u64 blockMappingEndAddr; //!< The end GPU address of `blockMapping`
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BufferView view; //!< The buffer view created as a result of a call to `Update()`
|
BufferView view{}; //!< The buffer view created as a result of a call to `Update()`
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Updates `view` based on the supplied GPU mapping
|
* @brief Updates `view` based on the supplied GPU mapping
|
||||||
|
@ -17,11 +17,16 @@ namespace skyline::gpu::interconnect {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, span<const u32, Shader::Info::MAX_CBUFS> cbufSizes, BufferView view, size_t idx) {
|
static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx,
|
||||||
|
span<const u32, Shader::Info::MAX_CBUFS> cbufSizes,
|
||||||
|
BufferView view, size_t idx,
|
||||||
|
vk::PipelineStageFlagBits dstStage,
|
||||||
|
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
if (!view) // Return a dummy buffer if the constant buffer isn't bound
|
if (!view) // Return a dummy buffer if the constant buffer isn't bound
|
||||||
return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, PAGE_SIZE).buffer, 0, PAGE_SIZE};
|
return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, PAGE_SIZE).buffer, 0, PAGE_SIZE};
|
||||||
|
|
||||||
ctx.executor.AttachBuffer(view);
|
ctx.executor.AttachBuffer(view);
|
||||||
|
view.GetBuffer()->PopulateReadBarrier(dstStage, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
size_t sizeOverride{std::min<size_t>(cbufSizes[idx], view.size)};
|
size_t sizeOverride{std::min<size_t>(cbufSizes[idx], view.size)};
|
||||||
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) {
|
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) {
|
||||||
@ -32,7 +37,10 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const auto &desc, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView) {
|
static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const auto &desc,
|
||||||
|
ConstantBuffer &cbuf, CachedMappedBufferView &cachedView,
|
||||||
|
vk::PipelineStageFlagBits dstStage,
|
||||||
|
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
struct SsboDescriptor {
|
struct SsboDescriptor {
|
||||||
u64 address;
|
u64 address;
|
||||||
u32 size;
|
u32 size;
|
||||||
@ -46,8 +54,14 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
auto view{cachedView.view};
|
auto view{cachedView.view};
|
||||||
ctx.executor.AttachBuffer(view);
|
ctx.executor.AttachBuffer(view);
|
||||||
|
view.GetBuffer()->PopulateReadBarrier(dstStage, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
if (desc.is_written) {
|
if (desc.is_written) {
|
||||||
|
if (view.GetBuffer()->SequencedCpuBackingWritesBlocked()) {
|
||||||
|
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||||
|
dstStageMask |= dstStage;
|
||||||
|
}
|
||||||
|
|
||||||
view.GetBuffer()->MarkGpuDirty();
|
view.GetBuffer()->MarkGpuDirty();
|
||||||
} else {
|
} else {
|
||||||
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
|
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
|
||||||
@ -77,11 +91,16 @@ namespace skyline::gpu::interconnect {
|
|||||||
return {.raw = primaryVal};
|
return {.raw = primaryVal};
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const auto &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) {
|
static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const auto &desc,
|
||||||
|
Samplers &samplers, Textures &textures,
|
||||||
|
BindlessHandle handle,
|
||||||
|
vk::PipelineStageFlagBits dstStage,
|
||||||
|
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)};
|
auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)};
|
||||||
auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)};
|
auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)};
|
||||||
ctx.executor.AttachTexture(texture);
|
ctx.executor.AttachTexture(texture);
|
||||||
auto view{texture->GetView()};
|
auto view{texture->GetView()};
|
||||||
|
texture->texture->PopulateReadBarrier(dstStage, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
vk::DescriptorImageInfo{
|
vk::DescriptorImageInfo{
|
||||||
|
@ -149,7 +149,9 @@ namespace skyline::gpu::interconnect {
|
|||||||
[=](auto &&executionCallback) {
|
[=](auto &&executionCallback) {
|
||||||
auto dst{dstTextureView.get()};
|
auto dst{dstTextureView.get()};
|
||||||
std::array<TextureView *, 1> sampledImages{srcTextureView.get()};
|
std::array<TextureView *, 1> sampledImages{srcTextureView.get()};
|
||||||
executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} }, sampledImages, {}, {dst});
|
executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} },
|
||||||
|
sampledImages, {}, {dst}, {}, false,
|
||||||
|
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands);
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -37,7 +37,8 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
|||||||
samplers.Update(ctx, qmd.samplerIndex == soc::gm20b::engine::kepler_compute::QMD::SamplerIndex::ViaHeaderIndex);
|
samplers.Update(ctx, qmd.samplerIndex == soc::gm20b::engine::kepler_compute::QMD::SamplerIndex::ViaHeaderIndex);
|
||||||
auto *pipeline{pipelineState.Update(ctx, builder, textures, constantBuffers.boundConstantBuffers, qmd)};
|
auto *pipeline{pipelineState.Update(ctx, builder, textures, constantBuffers.boundConstantBuffers, qmd)};
|
||||||
|
|
||||||
auto *descUpdateInfo{pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures)};
|
vk::PipelineStageFlags srcStageMask{}, dstStageMask{};
|
||||||
|
auto *descUpdateInfo{pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, srcStageMask, dstStageMask)};
|
||||||
builder.SetPipeline(*pipeline->compiledPipeline.pipeline, vk::PipelineBindPoint::eCompute);
|
builder.SetPipeline(*pipeline->compiledPipeline.pipeline, vk::PipelineBindPoint::eCompute);
|
||||||
|
|
||||||
if (ctx.gpu.traits.supportsPushDescriptors) {
|
if (ctx.gpu.traits.supportsPushDescriptors) {
|
||||||
@ -57,13 +58,20 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
|||||||
struct DrawParams {
|
struct DrawParams {
|
||||||
StateUpdater stateUpdater;
|
StateUpdater stateUpdater;
|
||||||
std::array<u32, 3> dimensions;
|
std::array<u32, 3> dimensions;
|
||||||
|
vk::PipelineStageFlags srcStageMask, dstStageMask;
|
||||||
};
|
};
|
||||||
auto *drawParams{ctx.executor.allocator->EmplaceUntracked<DrawParams>(DrawParams{stateUpdater, {qmd.ctaRasterWidth, qmd.ctaRasterHeight, qmd.ctaRasterDepth}})};
|
auto *drawParams{ctx.executor.allocator->EmplaceUntracked<DrawParams>(DrawParams{stateUpdater, {qmd.ctaRasterWidth, qmd.ctaRasterHeight, qmd.ctaRasterDepth}, srcStageMask, dstStageMask})};
|
||||||
|
|
||||||
|
|
||||||
ctx.executor.AddOutsideRpCommand([drawParams](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
|
ctx.executor.AddOutsideRpCommand([drawParams](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
|
||||||
drawParams->stateUpdater.RecordAll(gpu, commandBuffer);
|
drawParams->stateUpdater.RecordAll(gpu, commandBuffer);
|
||||||
|
|
||||||
|
if (drawParams->srcStageMask && drawParams->dstStageMask)
|
||||||
|
commandBuffer.pipelineBarrier(drawParams->srcStageMask, drawParams->dstStageMask, {}, {vk::MemoryBarrier{
|
||||||
|
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite
|
||||||
|
}}, {}, {});
|
||||||
|
|
||||||
commandBuffer.dispatch(drawParams->dimensions[0], drawParams->dimensions[1], drawParams->dimensions[2]);
|
commandBuffer.dispatch(drawParams->dimensions[0], drawParams->dimensions[1], drawParams->dimensions[2]);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -120,7 +120,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) {
|
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
||||||
|
|
||||||
u32 writeIdx{};
|
u32 writeIdx{};
|
||||||
@ -175,12 +175,18 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
|||||||
writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors,
|
writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors,
|
||||||
[&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) {
|
[&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) {
|
||||||
size_t cbufIdx{desc.index + arrayIdx};
|
size_t cbufIdx{desc.index + arrayIdx};
|
||||||
return GetConstantBufferBinding(ctx, shaderStage.info.constant_buffer_used_sizes, constantBuffers[cbufIdx].view, cbufIdx);
|
return GetConstantBufferBinding(ctx, shaderStage.info.constant_buffer_used_sizes,
|
||||||
|
constantBuffers[cbufIdx].view, cbufIdx,
|
||||||
|
vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
srcStageMask, dstStageMask);
|
||||||
});
|
});
|
||||||
|
|
||||||
writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors,
|
writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors,
|
||||||
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
|
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
|
||||||
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[desc.cbuf_index], storageBufferViews[storageBufferIdx])};
|
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[desc.cbuf_index],
|
||||||
|
storageBufferViews[storageBufferIdx],
|
||||||
|
vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
srcStageMask, dstStageMask)};
|
||||||
storageBufferIdx += arrayIdx ? 0 : 1;
|
storageBufferIdx += arrayIdx ? 0 : 1;
|
||||||
return binding;
|
return binding;
|
||||||
});
|
});
|
||||||
@ -188,7 +194,10 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
|||||||
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, shaderStage.info.texture_descriptors,
|
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, shaderStage.info.texture_descriptors,
|
||||||
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) {
|
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) {
|
||||||
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers, desc, arrayIdx)};
|
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers, desc, arrayIdx)};
|
||||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
auto binding{GetTextureBinding(ctx, desc,
|
||||||
|
samplers, textures, handle,
|
||||||
|
vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
srcStageMask, dstStageMask)};
|
||||||
return binding.first;
|
return binding.first;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
|||||||
/**
|
/**
|
||||||
* @brief Creates a descriptor set update from the current GPU state
|
* @brief Creates a descriptor set update from the current GPU state
|
||||||
*/
|
*/
|
||||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures);
|
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
};
|
};
|
||||||
|
|
||||||
class PipelineManager {
|
class PipelineManager {
|
||||||
|
@ -21,13 +21,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
VertexBufferState::VertexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
|
VertexBufferState::VertexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
|
||||||
|
|
||||||
void VertexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder) {
|
void VertexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
size_t size{engine->vertexStreamLimit - engine->vertexStream.location + 1};
|
size_t size{engine->vertexStreamLimit - engine->vertexStream.location + 1};
|
||||||
|
|
||||||
if (engine->vertexStream.format.enable && engine->vertexStream.location != 0 && size) {
|
if (engine->vertexStream.format.enable && engine->vertexStream.location != 0 && size) {
|
||||||
view.Update(ctx, engine->vertexStream.location, size);
|
view.Update(ctx, engine->vertexStream.location, size);
|
||||||
if (*view) {
|
if (*view) {
|
||||||
ctx.executor.AttachBuffer(*view);
|
ctx.executor.AttachBuffer(*view);
|
||||||
|
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag);
|
if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag);
|
||||||
megaBufferBinding)
|
megaBufferBinding)
|
||||||
@ -48,7 +49,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer}, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride);
|
builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer}, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder) {
|
bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
|
if (*view)
|
||||||
|
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
if (megaBufferBinding) {
|
if (megaBufferBinding) {
|
||||||
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)};
|
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)};
|
||||||
newMegaBufferBinding != megaBufferBinding) {
|
newMegaBufferBinding != megaBufferBinding) {
|
||||||
@ -117,7 +121,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
IndexBufferState::IndexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine) : engine{manager, dirtyHandle, engine} {}
|
IndexBufferState::IndexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine) : engine{manager, dirtyHandle, engine} {}
|
||||||
|
|
||||||
void IndexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
void IndexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
||||||
usedElementCount = elementCount;
|
usedElementCount = elementCount;
|
||||||
usedFirstIndex = firstIndex;
|
usedFirstIndex = firstIndex;
|
||||||
usedQuadConversion = quadConversion;
|
usedQuadConversion = quadConversion;
|
||||||
@ -130,6 +134,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ctx.executor.AttachBuffer(*view);
|
ctx.executor.AttachBuffer(*view);
|
||||||
|
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
indexType = ConvertIndexType(engine->indexBuffer.indexSize);
|
indexType = ConvertIndexType(engine->indexBuffer.indexSize);
|
||||||
|
|
||||||
@ -144,7 +149,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
builder.SetIndexBuffer(*view, indexType);
|
builder.SetIndexBuffer(*view, indexType);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IndexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
bool IndexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
||||||
|
if (*view)
|
||||||
|
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
if (elementCount > usedElementCount)
|
if (elementCount > usedElementCount)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -185,7 +193,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
TransformFeedbackBufferState::TransformFeedbackBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
|
TransformFeedbackBufferState::TransformFeedbackBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
|
||||||
|
|
||||||
void TransformFeedbackBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder) {
|
void TransformFeedbackBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
if (engine->streamOutEnable) {
|
if (engine->streamOutEnable) {
|
||||||
if (engine->streamOutBuffer.size) {
|
if (engine->streamOutBuffer.size) {
|
||||||
view.Update(ctx, engine->streamOutBuffer.address + engine->streamOutBuffer.loadWritePointerStartOffset, engine->streamOutBuffer.size);
|
view.Update(ctx, engine->streamOutBuffer.address + engine->streamOutBuffer.loadWritePointerStartOffset, engine->streamOutBuffer.size);
|
||||||
@ -193,6 +201,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
if (*view) {
|
if (*view) {
|
||||||
ctx.executor.AttachBuffer(*view);
|
ctx.executor.AttachBuffer(*view);
|
||||||
|
|
||||||
|
if (view->GetBuffer()->SequencedCpuBackingWritesBlocked()) {
|
||||||
|
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||||
|
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
|
||||||
|
}
|
||||||
|
|
||||||
view->GetBuffer()->MarkGpuDirty();
|
view->GetBuffer()->MarkGpuDirty();
|
||||||
builder.SetTransformFeedbackBuffer(index, *view);
|
builder.SetTransformFeedbackBuffer(index, *view);
|
||||||
return;
|
return;
|
||||||
@ -206,6 +219,15 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TransformFeedbackBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
|
if (*view && view->GetBuffer()->SequencedCpuBackingWritesBlocked()) {
|
||||||
|
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||||
|
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void TransformFeedbackBufferState::PurgeCaches() {
|
void TransformFeedbackBufferState::PurgeCaches() {
|
||||||
view.PurgeCaches();
|
view.PurgeCaches();
|
||||||
}
|
}
|
||||||
@ -408,18 +430,22 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
dirtyFunc(stencilValues);
|
dirtyFunc(stencilValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ActiveState::Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder, bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount) {
|
void ActiveState::Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder,
|
||||||
|
bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount,
|
||||||
|
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
if (topology != directState.inputAssembly.GetPrimitiveTopology()) {
|
if (topology != directState.inputAssembly.GetPrimitiveTopology()) {
|
||||||
directState.inputAssembly.SetPrimitiveTopology(topology);
|
directState.inputAssembly.SetPrimitiveTopology(topology);
|
||||||
pipeline.MarkDirty(false);
|
pipeline.MarkDirty(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto updateFunc{[&](auto &stateElem, auto &&... args) { stateElem.Update(ctx, builder, args...); }};
|
auto updateFunc{[&](auto &stateElem, auto &&... args) { stateElem.Update(ctx, builder, args...); }};
|
||||||
|
auto updateFuncBuffer{[&](auto &stateElem, auto &&... args) { stateElem.Update(ctx, builder, srcStageMask, dstStageMask, args...); }};
|
||||||
|
|
||||||
pipeline.Update(ctx, textures, constantBuffers, builder);
|
pipeline.Update(ctx, textures, constantBuffers, builder);
|
||||||
ranges::for_each(vertexBuffers, updateFunc);
|
ranges::for_each(vertexBuffers, updateFuncBuffer);
|
||||||
if (indexed)
|
if (indexed)
|
||||||
updateFunc(indexBuffer, directState.inputAssembly.NeedsQuadConversion(), drawFirstIndex, drawElementCount);
|
updateFuncBuffer(indexBuffer, directState.inputAssembly.NeedsQuadConversion(), drawFirstIndex, drawElementCount);
|
||||||
ranges::for_each(transformFeedbackBuffers, updateFunc);
|
ranges::for_each(transformFeedbackBuffers, updateFuncBuffer);
|
||||||
ranges::for_each(viewports, updateFunc);
|
ranges::for_each(viewports, updateFunc);
|
||||||
ranges::for_each(scissors, updateFunc);
|
ranges::for_each(scissors, updateFunc);
|
||||||
updateFunc(lineWidth);
|
updateFunc(lineWidth);
|
||||||
|
@ -27,9 +27,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
public:
|
public:
|
||||||
VertexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index);
|
VertexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index);
|
||||||
|
|
||||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder);
|
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
|
|
||||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder);
|
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
|
|
||||||
void PurgeCaches();
|
void PurgeCaches();
|
||||||
};
|
};
|
||||||
@ -54,14 +54,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
public:
|
public:
|
||||||
IndexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine);
|
IndexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine);
|
||||||
|
|
||||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount);
|
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount);
|
||||||
|
|
||||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount);
|
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount);
|
||||||
|
|
||||||
void PurgeCaches();
|
void PurgeCaches();
|
||||||
};
|
};
|
||||||
|
|
||||||
class TransformFeedbackBufferState : dirty::CachedManualDirty {
|
class TransformFeedbackBufferState : dirty::CachedManualDirty, dirty::RefreshableManualDirty {
|
||||||
public:
|
public:
|
||||||
struct EngineRegisters {
|
struct EngineRegisters {
|
||||||
const engine::StreamOutBuffer &streamOutBuffer;
|
const engine::StreamOutBuffer &streamOutBuffer;
|
||||||
@ -78,7 +78,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
public:
|
public:
|
||||||
TransformFeedbackBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index);
|
TransformFeedbackBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index);
|
||||||
|
|
||||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder);
|
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
|
|
||||||
|
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
|
|
||||||
void PurgeCaches();
|
void PurgeCaches();
|
||||||
};
|
};
|
||||||
@ -258,7 +260,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
/**
|
/**
|
||||||
* @brief Updates the active state for a given draw operation, removing the dirtiness of all member states
|
* @brief Updates the active state for a given draw operation, removing the dirtiness of all member states
|
||||||
*/
|
*/
|
||||||
void Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder, bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount);
|
void Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder,
|
||||||
|
bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount,
|
||||||
|
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
|
|
||||||
Pipeline *GetPipeline();
|
Pipeline *GetPipeline();
|
||||||
|
|
||||||
|
@ -212,10 +212,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
void Maxwell3D::Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance) {
|
void Maxwell3D::Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance) {
|
||||||
StateUpdateBuilder builder{*ctx.executor.allocator};
|
StateUpdateBuilder builder{*ctx.executor.allocator};
|
||||||
|
vk::PipelineStageFlags srcStageMask{}, dstStageMask{};
|
||||||
|
|
||||||
Pipeline *oldPipeline{activeState.GetPipeline()};
|
Pipeline *oldPipeline{activeState.GetPipeline()};
|
||||||
samplers.Update(ctx, samplerBinding.value == engine::SamplerBinding::Value::ViaHeaderBinding);
|
samplers.Update(ctx, samplerBinding.value == engine::SamplerBinding::Value::ViaHeaderBinding);
|
||||||
activeState.Update(ctx, textures, constantBuffers.boundConstantBuffers, builder, indexed, topology, first, count);
|
activeState.Update(ctx, textures, constantBuffers.boundConstantBuffers, builder, indexed, topology, first, count, srcStageMask, dstStageMask);
|
||||||
if (directState.inputAssembly.NeedsQuadConversion()) {
|
if (directState.inputAssembly.NeedsQuadConversion()) {
|
||||||
count = conversion::quads::GetIndexCount(count);
|
count = conversion::quads::GetIndexCount(count);
|
||||||
first = 0;
|
first = 0;
|
||||||
@ -231,17 +232,18 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
Pipeline *pipeline{activeState.GetPipeline()};
|
Pipeline *pipeline{activeState.GetPipeline()};
|
||||||
activeDescriptorSetSampledImages.resize(pipeline->GetTotalSampledImageCount());
|
activeDescriptorSetSampledImages.resize(pipeline->GetTotalSampledImageCount());
|
||||||
|
|
||||||
|
|
||||||
auto *descUpdateInfo{[&]() -> DescriptorUpdateInfo * {
|
auto *descUpdateInfo{[&]() -> DescriptorUpdateInfo * {
|
||||||
if (((oldPipeline == pipeline) || (oldPipeline && oldPipeline->CheckBindingMatch(pipeline))) && constantBuffers.quickBindEnabled) {
|
if (((oldPipeline == pipeline) || (oldPipeline && oldPipeline->CheckBindingMatch(pipeline))) && constantBuffers.quickBindEnabled) {
|
||||||
// If bindings between the old and new pipelines are the same we can reuse the descriptor sets given that quick bind is enabled (meaning that no buffer updates or calls to non-graphics engines have occurred that could invalidate them)
|
// If bindings between the old and new pipelines are the same we can reuse the descriptor sets given that quick bind is enabled (meaning that no buffer updates or calls to non-graphics engines have occurred that could invalidate them)
|
||||||
if (constantBuffers.quickBind)
|
if (constantBuffers.quickBind)
|
||||||
// If only a single constant buffer has been rebound between draws we can perform a partial descriptor update
|
// If only a single constant buffer has been rebound between draws we can perform a partial descriptor update
|
||||||
return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind, activeDescriptorSetSampledImages);
|
return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind, activeDescriptorSetSampledImages, srcStageMask, dstStageMask);
|
||||||
else
|
else
|
||||||
return nullptr;
|
return nullptr;
|
||||||
} else {
|
} else {
|
||||||
// If bindings have changed or quick bind is disabled, perform a full descriptor update
|
// If bindings have changed or quick bind is disabled, perform a full descriptor update
|
||||||
return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, activeDescriptorSetSampledImages);
|
return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, activeDescriptorSetSampledImages, srcStageMask, dstStageMask);
|
||||||
}
|
}
|
||||||
}()};
|
}()};
|
||||||
|
|
||||||
@ -319,7 +321,6 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
if (drawParams->transformFeedbackEnable)
|
if (drawParams->transformFeedbackEnable)
|
||||||
commandBuffer.endTransformFeedbackEXT(0, {}, {});
|
commandBuffer.endTransformFeedbackEXT(0, {}, {});
|
||||||
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility);
|
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility, srcStageMask, dstStageMask);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -258,6 +258,22 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
return shaderStages;
|
return shaderStages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static vk::PipelineStageFlagBits ConvertShaderToPipelineStage(vk::ShaderStageFlagBits stage) {
|
||||||
|
switch (stage) {
|
||||||
|
case vk::ShaderStageFlagBits::eVertex:
|
||||||
|
return vk::PipelineStageFlagBits::eVertexShader;
|
||||||
|
case vk::ShaderStageFlagBits::eTessellationControl:
|
||||||
|
return vk::PipelineStageFlagBits::eTessellationControlShader;
|
||||||
|
case vk::ShaderStageFlagBits::eTessellationEvaluation:
|
||||||
|
return vk::PipelineStageFlagBits::eTessellationEvaluationShader;
|
||||||
|
case vk::ShaderStageFlagBits::eGeometry:
|
||||||
|
return vk::PipelineStageFlagBits::eGeometryShader;
|
||||||
|
case vk::ShaderStageFlagBits::eFragment:
|
||||||
|
return vk::PipelineStageFlagBits::eFragmentShader;
|
||||||
|
default:
|
||||||
|
throw exception("Invalid shader stage");
|
||||||
|
}
|
||||||
|
}
|
||||||
static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array<ShaderStage, engine::ShaderStageCount> &shaderStages, bool needsIndividualTextureBindingWrites) {
|
static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array<ShaderStage, engine::ShaderStageCount> &shaderStages, bool needsIndividualTextureBindingWrites) {
|
||||||
Pipeline::DescriptorInfo descriptorInfo{};
|
Pipeline::DescriptorInfo descriptorInfo{};
|
||||||
u16 bindingIndex{};
|
u16 bindingIndex{};
|
||||||
@ -268,6 +284,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
auto &stageDescInfo{descriptorInfo.stages[i]};
|
auto &stageDescInfo{descriptorInfo.stages[i]};
|
||||||
|
stageDescInfo.stage = ConvertShaderToPipelineStage(stage.stage);
|
||||||
|
|
||||||
auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u16 &count, auto &outputDescs, auto &&descCb, bool individualDescWrites = false) {
|
auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u16 &count, auto &outputDescs, auto &&descCb, bool individualDescWrites = false) {
|
||||||
descriptorInfo.totalWriteDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0);
|
descriptorInfo.totalWriteDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0);
|
||||||
@ -712,7 +729,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
return descriptorInfo.totalCombinedImageSamplerCount;
|
return descriptorInfo.totalCombinedImageSamplerCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages) {
|
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
||||||
|
|
||||||
u32 writeIdx{};
|
u32 writeIdx{};
|
||||||
@ -788,12 +805,18 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.uniformBufferDescs, stage.uniformBufferDescTotalCount,
|
writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.uniformBufferDescs, stage.uniformBufferDescTotalCount,
|
||||||
[&](const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) {
|
[&](const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) {
|
||||||
size_t cbufIdx{desc.index + arrayIdx};
|
size_t cbufIdx{desc.index + arrayIdx};
|
||||||
return GetConstantBufferBinding(ctx, {stage.constantBufferUsedSizes}, constantBuffers[i][cbufIdx].view, cbufIdx);
|
return GetConstantBufferBinding(ctx, {stage.constantBufferUsedSizes},
|
||||||
|
constantBuffers[i][cbufIdx].view, cbufIdx,
|
||||||
|
stage.stage,
|
||||||
|
srcStageMask, dstStageMask);
|
||||||
});
|
});
|
||||||
|
|
||||||
writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.storageBufferDescs, stage.storageBufferDescTotalCount,
|
writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.storageBufferDescs, stage.storageBufferDescTotalCount,
|
||||||
[&](const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
|
[&](const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
|
||||||
return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]);
|
return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index],
|
||||||
|
storageBufferViews[storageBufferIdx++],
|
||||||
|
stage.stage,
|
||||||
|
srcStageMask, dstStageMask);
|
||||||
});
|
});
|
||||||
|
|
||||||
bindingIdx += stage.uniformTexelBufferDescs.size();
|
bindingIdx += stage.uniformTexelBufferDescs.size();
|
||||||
@ -802,7 +825,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.combinedImageSamplerDescs, stage.combinedImageSamplerDescTotalCount,
|
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.combinedImageSamplerDescs, stage.combinedImageSamplerDescTotalCount,
|
||||||
[&](const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
|
[&](const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
|
||||||
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)};
|
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)};
|
||||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
auto binding{GetTextureBinding(ctx, desc,
|
||||||
|
samplers, textures, handle,
|
||||||
|
stage.stage,
|
||||||
|
srcStageMask, dstStageMask)};
|
||||||
sampledImages[combinedImageSamplerIdx++] = binding.second;
|
sampledImages[combinedImageSamplerIdx++] = binding.second;
|
||||||
return binding.first;
|
return binding.first;
|
||||||
}, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites);
|
}, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites);
|
||||||
@ -825,7 +851,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages) {
|
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
||||||
|
|
||||||
size_t stageIndex{static_cast<size_t>(quickBind.stage)};
|
size_t stageIndex{static_cast<size_t>(quickBind.stage)};
|
||||||
@ -879,18 +905,27 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
writeDescs.operator()<false, true>(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, stageDescInfo.uniformBufferDescs,
|
writeDescs.operator()<false, true>(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, stageDescInfo.uniformBufferDescs,
|
||||||
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) -> DynamicBufferBinding {
|
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) -> DynamicBufferBinding {
|
||||||
size_t cbufIdx{desc.index + arrayIdx};
|
size_t cbufIdx{desc.index + arrayIdx};
|
||||||
return GetConstantBufferBinding(ctx, {stageDescInfo.constantBufferUsedSizes}, stageConstantBuffers[cbufIdx].view, cbufIdx);
|
return GetConstantBufferBinding(ctx, {stageDescInfo.constantBufferUsedSizes},
|
||||||
|
stageConstantBuffers[cbufIdx].view, cbufIdx,
|
||||||
|
stageDescInfo.stage,
|
||||||
|
srcStageMask, dstStageMask);
|
||||||
});
|
});
|
||||||
|
|
||||||
writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, stageDescInfo.storageBufferDescs,
|
writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, stageDescInfo.storageBufferDescs,
|
||||||
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
|
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
|
||||||
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]);
|
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index],
|
||||||
|
storageBufferViews[usage.entirePipelineIdx + arrayIdx],
|
||||||
|
stageDescInfo.stage,
|
||||||
|
srcStageMask, dstStageMask);
|
||||||
});
|
});
|
||||||
|
|
||||||
writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, stageDescInfo.combinedImageSamplerDescs,
|
writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, stageDescInfo.combinedImageSamplerDescs,
|
||||||
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
|
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
|
||||||
BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)};
|
BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)};
|
||||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
auto binding{GetTextureBinding(ctx, desc,
|
||||||
|
samplers, textures, handle,
|
||||||
|
stageDescInfo.stage,
|
||||||
|
srcStageMask, dstStageMask)};
|
||||||
sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second;
|
sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second;
|
||||||
return binding.first;
|
return binding.first;
|
||||||
});
|
});
|
||||||
|
@ -29,6 +29,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBindings;
|
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBindings;
|
||||||
|
|
||||||
struct StageDescriptorInfo {
|
struct StageDescriptorInfo {
|
||||||
|
vk::PipelineStageFlagBits stage;
|
||||||
|
|
||||||
// Unwrapped counts (counting each array element as a separate descriptor) for the below desc structs
|
// Unwrapped counts (counting each array element as a separate descriptor) for the below desc structs
|
||||||
u16 uniformBufferDescTotalCount;
|
u16 uniformBufferDescTotalCount;
|
||||||
u16 storageBufferDescTotalCount;
|
u16 storageBufferDescTotalCount;
|
||||||
@ -248,13 +250,13 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
* @brief Creates a descriptor set update from the current GPU state
|
* @brief Creates a descriptor set update from the current GPU state
|
||||||
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
||||||
*/
|
*/
|
||||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages);
|
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates a partial descriptor set update from the current GPU state for only the subset of descriptors changed by the quick bind constant buffer
|
* @brief Creates a partial descriptor set update from the current GPU state for only the subset of descriptors changed by the quick bind constant buffer
|
||||||
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
||||||
*/
|
*/
|
||||||
DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages);
|
DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1006,5 +1006,32 @@ namespace skyline::gpu {
|
|||||||
void Texture::UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) {
|
void Texture::UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) {
|
||||||
lastRenderPassUsage = renderPassUsage;
|
lastRenderPassUsage = renderPassUsage;
|
||||||
lastRenderPassIndex = renderPassIndex;
|
lastRenderPassIndex = renderPassIndex;
|
||||||
|
|
||||||
|
if (renderPassUsage == texture::RenderPassUsage::RenderTarget)
|
||||||
|
pendingStageMask = vk::PipelineStageFlagBits::eVertexShader |
|
||||||
|
vk::PipelineStageFlagBits::eTessellationControlShader |
|
||||||
|
vk::PipelineStageFlagBits::eTessellationEvaluationShader |
|
||||||
|
vk::PipelineStageFlagBits::eGeometryShader |
|
||||||
|
vk::PipelineStageFlagBits::eFragmentShader |
|
||||||
|
vk::PipelineStageFlagBits::eComputeShader;
|
||||||
|
else if (renderPassUsage == texture::RenderPassUsage::None)
|
||||||
|
pendingStageMask = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
texture::RenderPassUsage Texture::GetLastRenderPassUsage() {
|
||||||
|
return lastRenderPassUsage;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Texture::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||||
|
if (!(pendingStageMask & dstStage))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil))
|
||||||
|
srcStageMask |= vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||||
|
else if (format->vkAspect & vk::ImageAspectFlagBits::eColor)
|
||||||
|
srcStageMask |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||||
|
|
||||||
|
pendingStageMask &= ~dstStage;
|
||||||
|
dstStageMask |= dstStage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -410,6 +410,7 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture
|
u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture
|
||||||
texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass
|
texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass
|
||||||
|
vk::PipelineStageFlags pendingStageMask{}; //!< List of pipeline stages that are yet to be flushed for reads since the last time this texture was used an an RT
|
||||||
|
|
||||||
friend TextureManager;
|
friend TextureManager;
|
||||||
friend TextureView;
|
friend TextureView;
|
||||||
@ -606,5 +607,15 @@ namespace skyline::gpu {
|
|||||||
* @brief Updates renderpass usage tracking information
|
* @brief Updates renderpass usage tracking information
|
||||||
*/
|
*/
|
||||||
void UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage);
|
void UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The last usage of the texture
|
||||||
|
*/
|
||||||
|
texture::RenderPassUsage GetLastRenderPassUsage();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Populates the input src and dst stage masks with appropriate read barrier parameters for the current texture state
|
||||||
|
*/
|
||||||
|
void PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user