mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-14 03:37:54 +03:00
Implement usage based implicit renderpass barrier generation
Full pipeline barriers between every RP can be extremely expensive on HW, by analysing the inputs and outputs of a draw it's possible to construct a much more optimal barrier that only syncs what is neccessary.
This commit is contained in:
parent
7a759326b3
commit
99bf7dbb36
@ -633,6 +633,13 @@ namespace skyline::gpu {
|
||||
return mirror;
|
||||
}
|
||||
|
||||
void Buffer::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (currentExecutionGpuDirty) {
|
||||
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||
dstStageMask |= dstStage;
|
||||
}
|
||||
}
|
||||
|
||||
void Buffer::lock() {
|
||||
mutex.lock();
|
||||
accumulatedCpuLockCounter++;
|
||||
|
@ -430,6 +430,11 @@ namespace skyline::gpu {
|
||||
* @note The buffer **must** be kept locked until the span is no longer in use
|
||||
*/
|
||||
span<u8> GetReadOnlyBackingSpan(bool isFirstUsage, const std::function<void()> &flushHostCallback);
|
||||
|
||||
/**
|
||||
* @brief Populates the input src and dst stage masks with appropriate read barrier parameters for the current buffer state
|
||||
*/
|
||||
void PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -279,9 +279,9 @@ namespace skyline::gpu::interconnect {
|
||||
return (!a && !b) || (a && b && b->GetView() == a);
|
||||
}
|
||||
|
||||
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
||||
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask) {
|
||||
auto addSubpass{[&] {
|
||||
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu);
|
||||
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu, srcStageMask, dstStageMask);
|
||||
lastSubpassColorAttachments.clear();
|
||||
lastSubpassInputAttachments.clear();
|
||||
|
||||
@ -415,8 +415,8 @@ namespace skyline::gpu::interconnect {
|
||||
cycle->AttachObject(dependency);
|
||||
}
|
||||
|
||||
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
||||
bool gotoNext{CreateRenderPassWithSubpass(renderArea, sampledImages, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)};
|
||||
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask) {
|
||||
bool gotoNext{CreateRenderPassWithSubpass(renderArea, sampledImages, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation, srcStageMask, dstStageMask)};
|
||||
if (gotoNext)
|
||||
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
||||
else
|
||||
|
@ -190,7 +190,7 @@ namespace skyline::gpu::interconnect {
|
||||
* @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible
|
||||
* @return If the next subpass must be started prior to issuing any commands
|
||||
*/
|
||||
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false);
|
||||
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false, vk::PipelineStageFlags srcStageMask = {}, vk::PipelineStageFlags dstStageMask = {});
|
||||
|
||||
/**
|
||||
* @brief Ends a render pass if one is currently active and resets all corresponding state
|
||||
@ -264,7 +264,7 @@ namespace skyline::gpu::interconnect {
|
||||
* @param exclusiveSubpass If this subpass should be the only subpass in a render pass
|
||||
* @note Any supplied texture should be attached prior and not undergo any persistent layout transitions till execution
|
||||
*/
|
||||
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false);
|
||||
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false, vk::PipelineStageFlags srcStageMask = {}, vk::PipelineStageFlags dstStageMask = {});
|
||||
|
||||
/**
|
||||
* @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
|
||||
|
@ -2,21 +2,18 @@
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include "command_nodes.h"
|
||||
#include "gpu/texture/texture.h"
|
||||
#include <vulkan/vulkan_enums.hpp>
|
||||
|
||||
namespace skyline::gpu::interconnect::node {
|
||||
RenderPassNode::RenderPassNode(vk::Rect2D renderArea) : subpassDependencies(
|
||||
{
|
||||
// We assume all past commands have been executed when this RP starts
|
||||
vk::SubpassDependency{
|
||||
RenderPassNode::RenderPassNode(vk::Rect2D renderArea)
|
||||
: externalDependency{vk::SubpassDependency{
|
||||
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
||||
.dstSubpass = 0,
|
||||
.srcStageMask = vk::PipelineStageFlagBits::eAllGraphics,
|
||||
.dstStageMask = vk::PipelineStageFlagBits::eAllGraphics,
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
}
|
||||
}
|
||||
), renderArea(renderArea) {}
|
||||
}},
|
||||
renderArea{renderArea} {}
|
||||
|
||||
u32 RenderPassNode::AddAttachment(TextureView *view, GPU &gpu) {
|
||||
auto vkView{view->GetView()};
|
||||
@ -42,6 +39,19 @@ namespace skyline::gpu::interconnect::node {
|
||||
.finalLayout = view->texture->layout,
|
||||
.flags = vk::AttachmentDescriptionFlagBits::eMayAlias
|
||||
});
|
||||
|
||||
if (auto usage{view->texture->GetLastRenderPassUsage()}; usage != texture::RenderPassUsage::None) {
|
||||
if (view->format->vkAspect & vk::ImageAspectFlagBits::eColor)
|
||||
externalDependency.dstStageMask |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
else if (view->format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil))
|
||||
externalDependency.dstStageMask |= vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
|
||||
if (usage == texture::RenderPassUsage::RenderTarget)
|
||||
externalDependency.srcStageMask |= externalDependency.dstStageMask;
|
||||
else if (usage == texture::RenderPassUsage::Sampled)
|
||||
externalDependency.srcStageMask |= vk::PipelineStageFlagBits::eAllGraphics;
|
||||
}
|
||||
|
||||
return static_cast<u32>(attachments.size() - 1);
|
||||
} else {
|
||||
// If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass
|
||||
@ -116,7 +126,10 @@ namespace skyline::gpu::interconnect::node {
|
||||
}
|
||||
}
|
||||
|
||||
void RenderPassNode::AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU& gpu) {
|
||||
void RenderPassNode::AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU &gpu, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask) {
|
||||
externalDependency.srcStageMask |= srcStageMask;
|
||||
externalDependency.dstStageMask |= dstStageMask;
|
||||
|
||||
attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0));
|
||||
|
||||
auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)};
|
||||
@ -225,6 +238,9 @@ namespace skyline::gpu::interconnect::node {
|
||||
preserveAttachmentIt++;
|
||||
}
|
||||
|
||||
if (externalDependency.srcStageMask && externalDependency.dstStageMask)
|
||||
subpassDependencies.push_back(externalDependency);
|
||||
|
||||
auto renderPass{gpu.renderPassCache.GetRenderPass(vk::RenderPassCreateInfo{
|
||||
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
|
||||
.pAttachments = attachmentDescriptions.data(),
|
||||
|
@ -48,6 +48,7 @@ namespace skyline::gpu::interconnect::node {
|
||||
public:
|
||||
std::vector<vk::SubpassDescription> subpassDescriptions;
|
||||
std::vector<vk::SubpassDependency> subpassDependencies;
|
||||
vk::SubpassDependency externalDependency;
|
||||
|
||||
vk::Rect2D renderArea;
|
||||
std::vector<vk::ClearValue> clearValues;
|
||||
@ -63,7 +64,7 @@ namespace skyline::gpu::interconnect::node {
|
||||
/**
|
||||
* @brief Creates a subpass with the attachments bound in the specified order
|
||||
*/
|
||||
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU& gpu);
|
||||
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU &gpu, vk::PipelineStageFlags srcStageMask, vk::PipelineStageFlags dstStageMask);
|
||||
|
||||
/**
|
||||
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
|
||||
|
@ -44,7 +44,7 @@ namespace skyline::gpu::interconnect {
|
||||
u64 blockMappingEndAddr; //!< The end GPU address of `blockMapping`
|
||||
|
||||
public:
|
||||
BufferView view; //!< The buffer view created as a result of a call to `Update()`
|
||||
BufferView view{}; //!< The buffer view created as a result of a call to `Update()`
|
||||
|
||||
/**
|
||||
* @brief Updates `view` based on the supplied GPU mapping
|
||||
|
@ -17,11 +17,16 @@ namespace skyline::gpu::interconnect {
|
||||
};
|
||||
};
|
||||
|
||||
static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, span<const u32, Shader::Info::MAX_CBUFS> cbufSizes, BufferView view, size_t idx) {
|
||||
static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx,
|
||||
span<const u32, Shader::Info::MAX_CBUFS> cbufSizes,
|
||||
BufferView view, size_t idx,
|
||||
vk::PipelineStageFlagBits dstStage,
|
||||
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (!view) // Return a dummy buffer if the constant buffer isn't bound
|
||||
return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, PAGE_SIZE).buffer, 0, PAGE_SIZE};
|
||||
|
||||
ctx.executor.AttachBuffer(view);
|
||||
view.GetBuffer()->PopulateReadBarrier(dstStage, srcStageMask, dstStageMask);
|
||||
|
||||
size_t sizeOverride{std::min<size_t>(cbufSizes[idx], view.size)};
|
||||
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) {
|
||||
@ -32,7 +37,10 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
}
|
||||
|
||||
static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const auto &desc, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView) {
|
||||
static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const auto &desc,
|
||||
ConstantBuffer &cbuf, CachedMappedBufferView &cachedView,
|
||||
vk::PipelineStageFlagBits dstStage,
|
||||
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
struct SsboDescriptor {
|
||||
u64 address;
|
||||
u32 size;
|
||||
@ -46,8 +54,14 @@ namespace skyline::gpu::interconnect {
|
||||
|
||||
auto view{cachedView.view};
|
||||
ctx.executor.AttachBuffer(view);
|
||||
view.GetBuffer()->PopulateReadBarrier(dstStage, srcStageMask, dstStageMask);
|
||||
|
||||
if (desc.is_written) {
|
||||
if (view.GetBuffer()->SequencedCpuBackingWritesBlocked()) {
|
||||
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||
dstStageMask |= dstStage;
|
||||
}
|
||||
|
||||
view.GetBuffer()->MarkGpuDirty();
|
||||
} else {
|
||||
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
|
||||
@ -77,11 +91,16 @@ namespace skyline::gpu::interconnect {
|
||||
return {.raw = primaryVal};
|
||||
}
|
||||
|
||||
static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const auto &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) {
|
||||
static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const auto &desc,
|
||||
Samplers &samplers, Textures &textures,
|
||||
BindlessHandle handle,
|
||||
vk::PipelineStageFlagBits dstStage,
|
||||
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)};
|
||||
auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)};
|
||||
ctx.executor.AttachTexture(texture);
|
||||
auto view{texture->GetView()};
|
||||
texture->texture->PopulateReadBarrier(dstStage, srcStageMask, dstStageMask);
|
||||
|
||||
return {
|
||||
vk::DescriptorImageInfo{
|
||||
|
@ -149,7 +149,9 @@ namespace skyline::gpu::interconnect {
|
||||
[=](auto &&executionCallback) {
|
||||
auto dst{dstTextureView.get()};
|
||||
std::array<TextureView *, 1> sampledImages{srcTextureView.get()};
|
||||
executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} }, sampledImages, {}, {dst});
|
||||
executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} },
|
||||
sampledImages, {}, {dst}, {}, false,
|
||||
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands);
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -37,7 +37,8 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
||||
samplers.Update(ctx, qmd.samplerIndex == soc::gm20b::engine::kepler_compute::QMD::SamplerIndex::ViaHeaderIndex);
|
||||
auto *pipeline{pipelineState.Update(ctx, builder, textures, constantBuffers.boundConstantBuffers, qmd)};
|
||||
|
||||
auto *descUpdateInfo{pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures)};
|
||||
vk::PipelineStageFlags srcStageMask{}, dstStageMask{};
|
||||
auto *descUpdateInfo{pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, srcStageMask, dstStageMask)};
|
||||
builder.SetPipeline(*pipeline->compiledPipeline.pipeline, vk::PipelineBindPoint::eCompute);
|
||||
|
||||
if (ctx.gpu.traits.supportsPushDescriptors) {
|
||||
@ -57,13 +58,20 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
||||
struct DrawParams {
|
||||
StateUpdater stateUpdater;
|
||||
std::array<u32, 3> dimensions;
|
||||
vk::PipelineStageFlags srcStageMask, dstStageMask;
|
||||
};
|
||||
auto *drawParams{ctx.executor.allocator->EmplaceUntracked<DrawParams>(DrawParams{stateUpdater, {qmd.ctaRasterWidth, qmd.ctaRasterHeight, qmd.ctaRasterDepth}})};
|
||||
auto *drawParams{ctx.executor.allocator->EmplaceUntracked<DrawParams>(DrawParams{stateUpdater, {qmd.ctaRasterWidth, qmd.ctaRasterHeight, qmd.ctaRasterDepth}, srcStageMask, dstStageMask})};
|
||||
|
||||
|
||||
ctx.executor.AddOutsideRpCommand([drawParams](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
|
||||
drawParams->stateUpdater.RecordAll(gpu, commandBuffer);
|
||||
|
||||
if (drawParams->srcStageMask && drawParams->dstStageMask)
|
||||
commandBuffer.pipelineBarrier(drawParams->srcStageMask, drawParams->dstStageMask, {}, {vk::MemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite
|
||||
}}, {}, {});
|
||||
|
||||
commandBuffer.dispatch(drawParams->dimensions[0], drawParams->dimensions[1], drawParams->dimensions[2]);
|
||||
});
|
||||
}
|
||||
|
@ -120,7 +120,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
||||
}
|
||||
}
|
||||
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) {
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
||||
|
||||
u32 writeIdx{};
|
||||
@ -175,12 +175,18 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
||||
writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors,
|
||||
[&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) {
|
||||
size_t cbufIdx{desc.index + arrayIdx};
|
||||
return GetConstantBufferBinding(ctx, shaderStage.info.constant_buffer_used_sizes, constantBuffers[cbufIdx].view, cbufIdx);
|
||||
return GetConstantBufferBinding(ctx, shaderStage.info.constant_buffer_used_sizes,
|
||||
constantBuffers[cbufIdx].view, cbufIdx,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
srcStageMask, dstStageMask);
|
||||
});
|
||||
|
||||
writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors,
|
||||
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
|
||||
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[desc.cbuf_index], storageBufferViews[storageBufferIdx])};
|
||||
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[desc.cbuf_index],
|
||||
storageBufferViews[storageBufferIdx],
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
srcStageMask, dstStageMask)};
|
||||
storageBufferIdx += arrayIdx ? 0 : 1;
|
||||
return binding;
|
||||
});
|
||||
@ -188,7 +194,10 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
||||
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, shaderStage.info.texture_descriptors,
|
||||
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) {
|
||||
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers, desc, arrayIdx)};
|
||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
||||
auto binding{GetTextureBinding(ctx, desc,
|
||||
samplers, textures, handle,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
srcStageMask, dstStageMask)};
|
||||
return binding.first;
|
||||
});
|
||||
|
||||
|
@ -55,7 +55,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
|
||||
/**
|
||||
* @brief Creates a descriptor set update from the current GPU state
|
||||
*/
|
||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures);
|
||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
};
|
||||
|
||||
class PipelineManager {
|
||||
|
@ -21,13 +21,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
VertexBufferState::VertexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
|
||||
|
||||
void VertexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder) {
|
||||
void VertexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
size_t size{engine->vertexStreamLimit - engine->vertexStream.location + 1};
|
||||
|
||||
if (engine->vertexStream.format.enable && engine->vertexStream.location != 0 && size) {
|
||||
view.Update(ctx, engine->vertexStream.location, size);
|
||||
if (*view) {
|
||||
ctx.executor.AttachBuffer(*view);
|
||||
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||
|
||||
if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag);
|
||||
megaBufferBinding)
|
||||
@ -48,7 +49,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer}, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride);
|
||||
}
|
||||
|
||||
bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder) {
|
||||
bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (*view)
|
||||
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||
|
||||
if (megaBufferBinding) {
|
||||
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)};
|
||||
newMegaBufferBinding != megaBufferBinding) {
|
||||
@ -117,7 +121,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
IndexBufferState::IndexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine) : engine{manager, dirtyHandle, engine} {}
|
||||
|
||||
void IndexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
||||
void IndexBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
||||
usedElementCount = elementCount;
|
||||
usedFirstIndex = firstIndex;
|
||||
usedQuadConversion = quadConversion;
|
||||
@ -130,6 +134,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
}
|
||||
|
||||
ctx.executor.AttachBuffer(*view);
|
||||
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||
|
||||
indexType = ConvertIndexType(engine->indexBuffer.indexSize);
|
||||
|
||||
@ -144,7 +149,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
builder.SetIndexBuffer(*view, indexType);
|
||||
}
|
||||
|
||||
bool IndexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
||||
bool IndexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount) {
|
||||
if (*view)
|
||||
view->GetBuffer()->PopulateReadBarrier(vk::PipelineStageFlagBits::eVertexInput, srcStageMask, dstStageMask);
|
||||
|
||||
if (elementCount > usedElementCount)
|
||||
return true;
|
||||
|
||||
@ -185,7 +193,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
TransformFeedbackBufferState::TransformFeedbackBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
|
||||
|
||||
void TransformFeedbackBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder) {
|
||||
void TransformFeedbackBufferState::Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (engine->streamOutEnable) {
|
||||
if (engine->streamOutBuffer.size) {
|
||||
view.Update(ctx, engine->streamOutBuffer.address + engine->streamOutBuffer.loadWritePointerStartOffset, engine->streamOutBuffer.size);
|
||||
@ -193,6 +201,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
if (*view) {
|
||||
ctx.executor.AttachBuffer(*view);
|
||||
|
||||
if (view->GetBuffer()->SequencedCpuBackingWritesBlocked()) {
|
||||
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
|
||||
}
|
||||
|
||||
view->GetBuffer()->MarkGpuDirty();
|
||||
builder.SetTransformFeedbackBuffer(index, *view);
|
||||
return;
|
||||
@ -206,6 +219,15 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
}
|
||||
}
|
||||
|
||||
bool TransformFeedbackBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (*view && view->GetBuffer()->SequencedCpuBackingWritesBlocked()) {
|
||||
srcStageMask |= vk::PipelineStageFlagBits::eAllCommands;
|
||||
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void TransformFeedbackBufferState::PurgeCaches() {
|
||||
view.PurgeCaches();
|
||||
}
|
||||
@ -408,18 +430,22 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
dirtyFunc(stencilValues);
|
||||
}
|
||||
|
||||
void ActiveState::Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder, bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount) {
|
||||
void ActiveState::Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder,
|
||||
bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount,
|
||||
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (topology != directState.inputAssembly.GetPrimitiveTopology()) {
|
||||
directState.inputAssembly.SetPrimitiveTopology(topology);
|
||||
pipeline.MarkDirty(false);
|
||||
}
|
||||
|
||||
auto updateFunc{[&](auto &stateElem, auto &&... args) { stateElem.Update(ctx, builder, args...); }};
|
||||
auto updateFuncBuffer{[&](auto &stateElem, auto &&... args) { stateElem.Update(ctx, builder, srcStageMask, dstStageMask, args...); }};
|
||||
|
||||
pipeline.Update(ctx, textures, constantBuffers, builder);
|
||||
ranges::for_each(vertexBuffers, updateFunc);
|
||||
ranges::for_each(vertexBuffers, updateFuncBuffer);
|
||||
if (indexed)
|
||||
updateFunc(indexBuffer, directState.inputAssembly.NeedsQuadConversion(), drawFirstIndex, drawElementCount);
|
||||
ranges::for_each(transformFeedbackBuffers, updateFunc);
|
||||
updateFuncBuffer(indexBuffer, directState.inputAssembly.NeedsQuadConversion(), drawFirstIndex, drawElementCount);
|
||||
ranges::for_each(transformFeedbackBuffers, updateFuncBuffer);
|
||||
ranges::for_each(viewports, updateFunc);
|
||||
ranges::for_each(scissors, updateFunc);
|
||||
updateFunc(lineWidth);
|
||||
|
@ -27,9 +27,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
public:
|
||||
VertexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index);
|
||||
|
||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder);
|
||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
|
||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder);
|
||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
|
||||
void PurgeCaches();
|
||||
};
|
||||
@ -54,14 +54,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
public:
|
||||
IndexBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine);
|
||||
|
||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount);
|
||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount);
|
||||
|
||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, bool quadConversion, u32 firstIndex, u32 elementCount);
|
||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask, bool quadConversion, u32 firstIndex, u32 elementCount);
|
||||
|
||||
void PurgeCaches();
|
||||
};
|
||||
|
||||
class TransformFeedbackBufferState : dirty::CachedManualDirty {
|
||||
class TransformFeedbackBufferState : dirty::CachedManualDirty, dirty::RefreshableManualDirty {
|
||||
public:
|
||||
struct EngineRegisters {
|
||||
const engine::StreamOutBuffer &streamOutBuffer;
|
||||
@ -78,7 +78,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
public:
|
||||
TransformFeedbackBufferState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index);
|
||||
|
||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder);
|
||||
void Flush(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
|
||||
bool Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
|
||||
void PurgeCaches();
|
||||
};
|
||||
@ -258,7 +260,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
/**
|
||||
* @brief Updates the active state for a given draw operation, removing the dirtiness of all member states
|
||||
*/
|
||||
void Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder, bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount);
|
||||
void Update(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder,
|
||||
bool indexed, engine::DrawTopology topology, u32 drawFirstIndex, u32 drawElementCount,
|
||||
vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
|
||||
Pipeline *GetPipeline();
|
||||
|
||||
|
@ -212,10 +212,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
void Maxwell3D::Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance) {
|
||||
StateUpdateBuilder builder{*ctx.executor.allocator};
|
||||
vk::PipelineStageFlags srcStageMask{}, dstStageMask{};
|
||||
|
||||
Pipeline *oldPipeline{activeState.GetPipeline()};
|
||||
samplers.Update(ctx, samplerBinding.value == engine::SamplerBinding::Value::ViaHeaderBinding);
|
||||
activeState.Update(ctx, textures, constantBuffers.boundConstantBuffers, builder, indexed, topology, first, count);
|
||||
activeState.Update(ctx, textures, constantBuffers.boundConstantBuffers, builder, indexed, topology, first, count, srcStageMask, dstStageMask);
|
||||
if (directState.inputAssembly.NeedsQuadConversion()) {
|
||||
count = conversion::quads::GetIndexCount(count);
|
||||
first = 0;
|
||||
@ -231,17 +232,18 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
Pipeline *pipeline{activeState.GetPipeline()};
|
||||
activeDescriptorSetSampledImages.resize(pipeline->GetTotalSampledImageCount());
|
||||
|
||||
|
||||
auto *descUpdateInfo{[&]() -> DescriptorUpdateInfo * {
|
||||
if (((oldPipeline == pipeline) || (oldPipeline && oldPipeline->CheckBindingMatch(pipeline))) && constantBuffers.quickBindEnabled) {
|
||||
// If bindings between the old and new pipelines are the same we can reuse the descriptor sets given that quick bind is enabled (meaning that no buffer updates or calls to non-graphics engines have occurred that could invalidate them)
|
||||
if (constantBuffers.quickBind)
|
||||
// If only a single constant buffer has been rebound between draws we can perform a partial descriptor update
|
||||
return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind, activeDescriptorSetSampledImages);
|
||||
return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind, activeDescriptorSetSampledImages, srcStageMask, dstStageMask);
|
||||
else
|
||||
return nullptr;
|
||||
} else {
|
||||
// If bindings have changed or quick bind is disabled, perform a full descriptor update
|
||||
return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, activeDescriptorSetSampledImages);
|
||||
return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, activeDescriptorSetSampledImages, srcStageMask, dstStageMask);
|
||||
}
|
||||
}()};
|
||||
|
||||
@ -319,7 +321,6 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
if (drawParams->transformFeedbackEnable)
|
||||
commandBuffer.endTransformFeedbackEXT(0, {}, {});
|
||||
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility);
|
||||
|
||||
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility, srcStageMask, dstStageMask);
|
||||
}
|
||||
}
|
@ -258,6 +258,22 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
return shaderStages;
|
||||
}
|
||||
|
||||
static vk::PipelineStageFlagBits ConvertShaderToPipelineStage(vk::ShaderStageFlagBits stage) {
|
||||
switch (stage) {
|
||||
case vk::ShaderStageFlagBits::eVertex:
|
||||
return vk::PipelineStageFlagBits::eVertexShader;
|
||||
case vk::ShaderStageFlagBits::eTessellationControl:
|
||||
return vk::PipelineStageFlagBits::eTessellationControlShader;
|
||||
case vk::ShaderStageFlagBits::eTessellationEvaluation:
|
||||
return vk::PipelineStageFlagBits::eTessellationEvaluationShader;
|
||||
case vk::ShaderStageFlagBits::eGeometry:
|
||||
return vk::PipelineStageFlagBits::eGeometryShader;
|
||||
case vk::ShaderStageFlagBits::eFragment:
|
||||
return vk::PipelineStageFlagBits::eFragmentShader;
|
||||
default:
|
||||
throw exception("Invalid shader stage");
|
||||
}
|
||||
}
|
||||
static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array<ShaderStage, engine::ShaderStageCount> &shaderStages, bool needsIndividualTextureBindingWrites) {
|
||||
Pipeline::DescriptorInfo descriptorInfo{};
|
||||
u16 bindingIndex{};
|
||||
@ -268,6 +284,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
continue;
|
||||
|
||||
auto &stageDescInfo{descriptorInfo.stages[i]};
|
||||
stageDescInfo.stage = ConvertShaderToPipelineStage(stage.stage);
|
||||
|
||||
auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u16 &count, auto &outputDescs, auto &&descCb, bool individualDescWrites = false) {
|
||||
descriptorInfo.totalWriteDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0);
|
||||
@ -712,7 +729,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
return descriptorInfo.totalCombinedImageSamplerCount;
|
||||
}
|
||||
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages) {
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
||||
|
||||
u32 writeIdx{};
|
||||
@ -788,12 +805,18 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.uniformBufferDescs, stage.uniformBufferDescTotalCount,
|
||||
[&](const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) {
|
||||
size_t cbufIdx{desc.index + arrayIdx};
|
||||
return GetConstantBufferBinding(ctx, {stage.constantBufferUsedSizes}, constantBuffers[i][cbufIdx].view, cbufIdx);
|
||||
return GetConstantBufferBinding(ctx, {stage.constantBufferUsedSizes},
|
||||
constantBuffers[i][cbufIdx].view, cbufIdx,
|
||||
stage.stage,
|
||||
srcStageMask, dstStageMask);
|
||||
});
|
||||
|
||||
writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.storageBufferDescs, stage.storageBufferDescTotalCount,
|
||||
[&](const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
|
||||
return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]);
|
||||
return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index],
|
||||
storageBufferViews[storageBufferIdx++],
|
||||
stage.stage,
|
||||
srcStageMask, dstStageMask);
|
||||
});
|
||||
|
||||
bindingIdx += stage.uniformTexelBufferDescs.size();
|
||||
@ -802,7 +825,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.combinedImageSamplerDescs, stage.combinedImageSamplerDescTotalCount,
|
||||
[&](const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
|
||||
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)};
|
||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
||||
auto binding{GetTextureBinding(ctx, desc,
|
||||
samplers, textures, handle,
|
||||
stage.stage,
|
||||
srcStageMask, dstStageMask)};
|
||||
sampledImages[combinedImageSamplerIdx++] = binding.second;
|
||||
return binding.first;
|
||||
}, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites);
|
||||
@ -825,7 +851,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
});
|
||||
}
|
||||
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages) {
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
SyncCachedStorageBufferViews(ctx.executor.executionTag);
|
||||
|
||||
size_t stageIndex{static_cast<size_t>(quickBind.stage)};
|
||||
@ -879,18 +905,27 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
writeDescs.operator()<false, true>(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, stageDescInfo.uniformBufferDescs,
|
||||
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) -> DynamicBufferBinding {
|
||||
size_t cbufIdx{desc.index + arrayIdx};
|
||||
return GetConstantBufferBinding(ctx, {stageDescInfo.constantBufferUsedSizes}, stageConstantBuffers[cbufIdx].view, cbufIdx);
|
||||
return GetConstantBufferBinding(ctx, {stageDescInfo.constantBufferUsedSizes},
|
||||
stageConstantBuffers[cbufIdx].view, cbufIdx,
|
||||
stageDescInfo.stage,
|
||||
srcStageMask, dstStageMask);
|
||||
});
|
||||
|
||||
writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, stageDescInfo.storageBufferDescs,
|
||||
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
|
||||
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]);
|
||||
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index],
|
||||
storageBufferViews[usage.entirePipelineIdx + arrayIdx],
|
||||
stageDescInfo.stage,
|
||||
srcStageMask, dstStageMask);
|
||||
});
|
||||
|
||||
writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, stageDescInfo.combinedImageSamplerDescs,
|
||||
[&](auto usage, const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
|
||||
BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)};
|
||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
||||
auto binding{GetTextureBinding(ctx, desc,
|
||||
samplers, textures, handle,
|
||||
stageDescInfo.stage,
|
||||
srcStageMask, dstStageMask)};
|
||||
sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second;
|
||||
return binding.first;
|
||||
});
|
||||
|
@ -29,6 +29,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBindings;
|
||||
|
||||
struct StageDescriptorInfo {
|
||||
vk::PipelineStageFlagBits stage;
|
||||
|
||||
// Unwrapped counts (counting each array element as a separate descriptor) for the below desc structs
|
||||
u16 uniformBufferDescTotalCount;
|
||||
u16 storageBufferDescTotalCount;
|
||||
@ -248,13 +250,13 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
* @brief Creates a descriptor set update from the current GPU state
|
||||
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
||||
*/
|
||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages);
|
||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
|
||||
/**
|
||||
* @brief Creates a partial descriptor set update from the current GPU state for only the subset of descriptors changed by the quick bind constant buffer
|
||||
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
||||
*/
|
||||
DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages);
|
||||
DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1006,5 +1006,32 @@ namespace skyline::gpu {
|
||||
void Texture::UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) {
|
||||
lastRenderPassUsage = renderPassUsage;
|
||||
lastRenderPassIndex = renderPassIndex;
|
||||
|
||||
if (renderPassUsage == texture::RenderPassUsage::RenderTarget)
|
||||
pendingStageMask = vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eTessellationControlShader |
|
||||
vk::PipelineStageFlagBits::eTessellationEvaluationShader |
|
||||
vk::PipelineStageFlagBits::eGeometryShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader |
|
||||
vk::PipelineStageFlagBits::eComputeShader;
|
||||
else if (renderPassUsage == texture::RenderPassUsage::None)
|
||||
pendingStageMask = {};
|
||||
}
|
||||
|
||||
texture::RenderPassUsage Texture::GetLastRenderPassUsage() {
|
||||
return lastRenderPassUsage;
|
||||
}
|
||||
|
||||
void Texture::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) {
|
||||
if (!(pendingStageMask & dstStage))
|
||||
return;
|
||||
|
||||
if (format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil))
|
||||
srcStageMask |= vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
else if (format->vkAspect & vk::ImageAspectFlagBits::eColor)
|
||||
srcStageMask |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
|
||||
pendingStageMask &= ~dstStage;
|
||||
dstStageMask |= dstStage;
|
||||
}
|
||||
}
|
||||
|
@ -410,6 +410,7 @@ namespace skyline::gpu {
|
||||
|
||||
u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture
|
||||
texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass
|
||||
vk::PipelineStageFlags pendingStageMask{}; //!< List of pipeline stages that are yet to be flushed for reads since the last time this texture was used an an RT
|
||||
|
||||
friend TextureManager;
|
||||
friend TextureView;
|
||||
@ -606,5 +607,15 @@ namespace skyline::gpu {
|
||||
* @brief Updates renderpass usage tracking information
|
||||
*/
|
||||
void UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage);
|
||||
|
||||
/**
|
||||
* @return The last usage of the texture
|
||||
*/
|
||||
texture::RenderPassUsage GetLastRenderPassUsage();
|
||||
|
||||
/**
|
||||
* @brief Populates the input src and dst stage masks with appropriate read barrier parameters for the current texture state
|
||||
*/
|
||||
void PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask);
|
||||
};
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user