mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-29 11:45:29 +03:00
Introduce texture usage system to ensure RPs are split when necessary
Vulkan doesn't allow sampling a texture and using it as an RT in the same RP, by tracking the texture usage status and splitting RPs when this occurs we can avoid such potential sync errors.
This commit is contained in:
parent
2dd4698441
commit
1088ed514c
@ -172,7 +172,7 @@ namespace skyline::gpu::interconnect {
|
||||
allocator = &slot->allocator;
|
||||
}
|
||||
|
||||
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
||||
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
||||
auto addSubpass{[&] {
|
||||
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu);
|
||||
|
||||
@ -195,35 +195,48 @@ namespace skyline::gpu::interconnect {
|
||||
lastSubpassDepthStencilAttachment = depthStencilAttachment;
|
||||
}};
|
||||
|
||||
span<TextureView *> depthStencilAttachmentSpan{depthStencilAttachment ? span<TextureView *>(depthStencilAttachment) : span<TextureView *>()};
|
||||
auto outputAttachmentViews{ranges::views::concat(colorAttachments, depthStencilAttachmentSpan)};
|
||||
bool attachmentsMatch{ranges::equal(lastSubpassInputAttachments, inputAttachments) &&
|
||||
ranges::equal(lastSubpassColorAttachments, colorAttachments) &&
|
||||
lastSubpassDepthStencilAttachment == depthStencilAttachment};
|
||||
|
||||
if (renderPass == nullptr || renderPass->renderArea != renderArea ||
|
||||
((noSubpassCreation || subpassCount >= gpu.traits.quirks.maxSubpassCount) && !attachmentsMatch)) {
|
||||
bool splitRenderPass{renderPass == nullptr || renderPass->renderArea != renderArea ||
|
||||
((noSubpassCreation || subpassCount >= gpu.traits.quirks.maxSubpassCount) && !attachmentsMatch) ||
|
||||
!ranges::all_of(outputAttachmentViews, [this] (auto view) { return !view || view->texture->ValidateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::RenderTarget); }) ||
|
||||
!ranges::all_of(sampledImages, [this] (auto view) { return view->texture->ValidateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::Sampled); })};
|
||||
|
||||
bool gotoNext{};
|
||||
if (splitRenderPass) {
|
||||
// We need to create a render pass if one doesn't already exist or the current one isn't compatible
|
||||
if (renderPass != nullptr)
|
||||
if (renderPass != nullptr) {
|
||||
slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
renderPassIndex++;
|
||||
}
|
||||
renderPass = &std::get<node::RenderPassNode>(slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
|
||||
addSubpass();
|
||||
subpassCount = 1;
|
||||
return false;
|
||||
} else {
|
||||
if (attachmentsMatch) {
|
||||
// The last subpass had the same attachments, so we can reuse them
|
||||
return false;
|
||||
} else {
|
||||
} else if (!attachmentsMatch) {
|
||||
// The last subpass had different attachments, so we need to create a new one
|
||||
addSubpass();
|
||||
subpassCount++;
|
||||
return true;
|
||||
}
|
||||
gotoNext = true;
|
||||
}
|
||||
|
||||
for (auto view : outputAttachmentViews)
|
||||
if (view)
|
||||
view->texture->UpdateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::RenderTarget);
|
||||
|
||||
for (auto view : sampledImages)
|
||||
view->texture->UpdateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::Sampled);
|
||||
|
||||
return gotoNext;
|
||||
}
|
||||
|
||||
void CommandExecutor::FinishRenderPass() {
|
||||
if (renderPass) {
|
||||
slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
renderPassIndex++;
|
||||
|
||||
renderPass = nullptr;
|
||||
subpassCount = 0;
|
||||
@ -309,8 +322,8 @@ namespace skyline::gpu::interconnect {
|
||||
cycle->AttachObject(dependency);
|
||||
}
|
||||
|
||||
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
||||
bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)};
|
||||
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
||||
bool gotoNext{CreateRenderPassWithSubpass(renderArea, sampledImages, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)};
|
||||
if (gotoNext)
|
||||
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
||||
else
|
||||
@ -325,7 +338,7 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
|
||||
void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) {
|
||||
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)};
|
||||
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment, nullptr)};
|
||||
if (renderPass->ClearColorAttachment(0, value, gpu)) {
|
||||
if (gotoNext)
|
||||
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
||||
@ -350,7 +363,7 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
|
||||
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
|
||||
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)};
|
||||
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, {}, attachment)};
|
||||
if (renderPass->ClearDepthStencilAttachment(value, gpu)) {
|
||||
if (gotoNext)
|
||||
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
||||
@ -413,6 +426,7 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
|
||||
texture->cycle = cycle;
|
||||
texture->UpdateRenderPassUsage(0, texture::RenderPassUsage::None);
|
||||
}
|
||||
}
|
||||
|
||||
@ -432,6 +446,7 @@ namespace skyline::gpu::interconnect {
|
||||
attachedTextures.clear();
|
||||
attachedBuffers.clear();
|
||||
allocator->Reset();
|
||||
renderPassIndex = 0;
|
||||
|
||||
// Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
|
||||
if ((submissionNumber % (*state.settings->executorSlotCount * 2)) == 0) {
|
||||
|
@ -77,6 +77,7 @@ namespace skyline::gpu::interconnect {
|
||||
CommandRecordThread::Slot *slot{};
|
||||
node::RenderPassNode *renderPass{};
|
||||
size_t subpassCount{}; //!< The number of subpasses in the current render pass
|
||||
u32 renderPassIndex{};
|
||||
bool preserveLocked{};
|
||||
|
||||
/**
|
||||
@ -136,7 +137,7 @@ namespace skyline::gpu::interconnect {
|
||||
* @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible
|
||||
* @return If the next subpass must be started prior to issuing any commands
|
||||
*/
|
||||
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false);
|
||||
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false);
|
||||
|
||||
/**
|
||||
* @brief Ends a render pass if one is currently active and resets all corresponding state
|
||||
@ -210,7 +211,7 @@ namespace skyline::gpu::interconnect {
|
||||
* @param exclusiveSubpass If this subpass should be the only subpass in a render pass
|
||||
* @note Any supplied texture should be attached prior and not undergo any persistent layout transitions till execution
|
||||
*/
|
||||
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false);
|
||||
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> sampledImages, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false);
|
||||
|
||||
/**
|
||||
* @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
|
||||
|
@ -148,7 +148,8 @@ namespace skyline::gpu::interconnect {
|
||||
srcTextureView.get(), dstTextureView.get(),
|
||||
[=](auto &&executionCallback) {
|
||||
auto dst{dstTextureView.get()};
|
||||
executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} }, {}, {dst});
|
||||
std::array<TextureView *, 1> sampledImages{srcTextureView.get()};
|
||||
executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} }, sampledImages, {}, {dst});
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -197,7 +197,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
std::array<TextureView *, 1> colorAttachments{colorView ? &*colorView : nullptr};
|
||||
ctx.executor.AddSubpass([clearAttachments, clearRects](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
|
||||
commandBuffer.clearAttachments(clearAttachments, span(clearRects).first(clearAttachments.size()));
|
||||
}, renderArea, {}, colorView ? colorAttachments : span<TextureView *>{}, depthStencilView ? &*depthStencilView : nullptr);
|
||||
}, renderArea, {}, {}, colorView ? colorAttachments : span<TextureView *>{}, depthStencilView ? &*depthStencilView : nullptr);
|
||||
}
|
||||
|
||||
void Maxwell3D::Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance) {
|
||||
@ -218,18 +218,19 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
}
|
||||
|
||||
Pipeline *pipeline{activeState.GetPipeline()};
|
||||
activeDescriptorSetSampledImages.resize(pipeline->GetTotalSampledImageCount());
|
||||
|
||||
auto *descUpdateInfo{[&]() -> DescriptorUpdateInfo * {
|
||||
if (((oldPipeline == pipeline) || (oldPipeline && oldPipeline->CheckBindingMatch(pipeline))) && constantBuffers.quickBindEnabled) {
|
||||
// If bindings between the old and new pipelines are the same we can reuse the descriptor sets given that quick bind is enabled (meaning that no buffer updates or calls to non-graphics engines have occurred that could invalidate them)
|
||||
if (constantBuffers.quickBind)
|
||||
// If only a single constant buffer has been rebound between draws we can perform a partial descriptor update
|
||||
return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind);
|
||||
return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind, activeDescriptorSetSampledImages);
|
||||
else
|
||||
return nullptr;
|
||||
} else {
|
||||
// If bindings have changed or quick bind is disabled, perform a full descriptor update
|
||||
return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures);
|
||||
return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, activeDescriptorSetSampledImages);
|
||||
}
|
||||
}()};
|
||||
|
||||
@ -295,7 +296,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
if (drawParams->transformFeedbackEnable)
|
||||
commandBuffer.endTransformFeedbackEXT(0, {}, {});
|
||||
}, scissor, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility);
|
||||
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility);
|
||||
|
||||
constantBuffers.ResetQuickBind();
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
static constexpr size_t DescriptorBatchSize{0x100};
|
||||
std::shared_ptr<boost::container::static_vector<DescriptorAllocator::ActiveDescriptorSet, DescriptorBatchSize>> attachedDescriptorSets;
|
||||
DescriptorAllocator::ActiveDescriptorSet *activeDescriptorSet{};
|
||||
std::vector<TextureView *> activeDescriptorSetSampledImages{};
|
||||
|
||||
size_t UpdateQuadConversionBuffer(u32 count, u32 firstVertex);
|
||||
|
||||
|
@ -280,21 +280,25 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
});
|
||||
pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, u32 descIdx) {
|
||||
auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]};
|
||||
usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount + descIdx});
|
||||
usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount});
|
||||
usage.totalBufferDescCount += desc.count;
|
||||
usage.writeDescCount++;
|
||||
descriptorInfo.totalStorageBufferCount += desc.count;
|
||||
});
|
||||
descriptorInfo.totalBufferDescCount += stageDescInfo.uniformBufferDescCount + stageDescInfo.storageBufferDescCount;
|
||||
descriptorInfo.totalStorageBufferCount += stageDescInfo.storageBufferDescCount;
|
||||
|
||||
pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, stageDescInfo.uniformTexelBufferDescCount, [](const auto &, u32) {});
|
||||
pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, stageDescInfo.storageTexelBufferDescCount, [](const auto &, u32) {});
|
||||
pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, stageDescInfo.uniformTexelBufferDescCount, [](const auto &, u32) {
|
||||
Logger::Warn("Texture buffer descriptors are not supported");
|
||||
});
|
||||
pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, stageDescInfo.storageTexelBufferDescCount, [](const auto &, u32) {
|
||||
Logger::Warn("Image buffer descriptors are not supported");
|
||||
});
|
||||
descriptorInfo.totalTexelBufferDescCount += stageDescInfo.uniformTexelBufferDescCount + stageDescInfo.storageTexelBufferDescCount;
|
||||
|
||||
pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, [&](const Shader::TextureDescriptor &desc, u32 descIdx) {
|
||||
auto addUsage{[&](auto idx) {
|
||||
auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]};
|
||||
usage.combinedImageSamplers.push_back({bindingIndex, descIdx});
|
||||
auto &usage{stageDescInfo.cbufUsages[idx]};
|
||||
usage.combinedImageSamplers.push_back({bindingIndex, descIdx, descriptorInfo.totalCombinedImageSamplerCount});
|
||||
usage.totalImageDescCount += desc.count;
|
||||
usage.writeDescCount++;
|
||||
}};
|
||||
@ -302,8 +306,12 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
addUsage(desc.cbuf_index);
|
||||
if (desc.has_secondary)
|
||||
addUsage(desc.secondary_cbuf_index);
|
||||
|
||||
descriptorInfo.totalCombinedImageSamplerCount += desc.count;
|
||||
}, needsIndividualTextureBindingWrites);
|
||||
pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, stageDescInfo.storageImageDescCount, [](const auto &, u32) {});
|
||||
pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, stageDescInfo.storageImageDescCount, [](const auto &, u32) {
|
||||
Logger::Warn("Image descriptors are not supported");
|
||||
});
|
||||
descriptorInfo.totalImageDescCount += stageDescInfo.combinedImageSamplerDescCount + stageDescInfo.storageImageDescCount;
|
||||
}
|
||||
return descriptorInfo;
|
||||
@ -630,6 +638,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
return true;
|
||||
}
|
||||
|
||||
u32 Pipeline::GetTotalSampledImageCount() const {
|
||||
return descriptorInfo.totalCombinedImageSamplerCount;
|
||||
}
|
||||
|
||||
static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, const Shader::Info &info, BufferView view, size_t idx) {
|
||||
if (!view) // Return a dummy buffer if the constant buffer isn't bound
|
||||
return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer, 0, PAGE_SIZE};
|
||||
@ -687,20 +699,23 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
return {.raw = primaryVal};
|
||||
}
|
||||
|
||||
static vk::DescriptorImageInfo GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) {
|
||||
static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) {
|
||||
auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)};
|
||||
auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)};
|
||||
ctx.executor.AttachTexture(texture);
|
||||
auto view{texture->GetView()};
|
||||
|
||||
return vk::DescriptorImageInfo{
|
||||
return {
|
||||
vk::DescriptorImageInfo{
|
||||
.sampler = **sampler,
|
||||
.imageView = view,
|
||||
.imageLayout = texture->texture->layout,
|
||||
.imageLayout = texture->texture->layout
|
||||
},
|
||||
texture
|
||||
};
|
||||
}
|
||||
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) {
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages) {
|
||||
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
|
||||
|
||||
u32 writeIdx{};
|
||||
@ -712,7 +727,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
u32 imageIdx{};
|
||||
auto imageDescs{ctx.executor.allocator->AllocateUntracked<vk::DescriptorImageInfo>(descriptorInfo.totalImageDescCount)};
|
||||
|
||||
u32 storageBufferIdx{}; // Need to keep track of this since to index into the cached view array
|
||||
u32 storageBufferIdx{}; // Need to keep track of this to index into the cached view array
|
||||
u32 combinedImageSamplerIdx{}; // Need to keep track of this to index into the sampled image array
|
||||
u32 bindingIdx{};
|
||||
|
||||
/**
|
||||
@ -781,16 +797,15 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount,
|
||||
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
|
||||
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx - arrayIdx ? 1 : 0])};
|
||||
// Storage buffer arrays all share the same view index, so to only increment the index once per array do it at element zero and subtract that for all subsequent array elems (see above)
|
||||
storageBufferIdx += arrayIdx ? 0 : 1;
|
||||
return binding;
|
||||
return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]);
|
||||
});
|
||||
|
||||
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount,
|
||||
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) {
|
||||
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)};
|
||||
return GetTextureBinding(ctx, desc, samplers, textures, handle);
|
||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
||||
sampledImages[combinedImageSamplerIdx++] = binding.second;
|
||||
return binding.first;
|
||||
}, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites);
|
||||
}
|
||||
|
||||
@ -809,7 +824,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
});
|
||||
}
|
||||
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind) {
|
||||
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages) {
|
||||
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
|
||||
|
||||
size_t stageIndex{static_cast<size_t>(quickBind.stage)};
|
||||
@ -869,13 +884,15 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors,
|
||||
[&](auto usage, const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
|
||||
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.storageBufferIdx]);
|
||||
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]);
|
||||
});
|
||||
|
||||
writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, shaderInfo.texture_descriptors,
|
||||
[&](auto usage, const Shader::TextureDescriptor &desc, size_t arrayIdx) {
|
||||
BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)};
|
||||
return GetTextureBinding(ctx, desc, samplers, textures, handle);
|
||||
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
|
||||
sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second;
|
||||
return binding.first;
|
||||
});
|
||||
|
||||
// Since we don't implement all descriptor types the number of writes might not match what's expected
|
||||
|
@ -60,7 +60,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
struct Usage {
|
||||
u32 binding; //!< Vulkan binding index
|
||||
u32 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member
|
||||
u32 storageBufferIdx; //!< Index of the storage buffer in the per-pipeline storage buffer cache
|
||||
u32 entirePipelineIdx; //!< Index of the image/storage buffer in the entire pipeline
|
||||
};
|
||||
|
||||
boost::container::small_vector<Usage, 2> uniformBuffers;
|
||||
@ -78,6 +78,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
std::array<StageDescriptorInfo, 5> stages;
|
||||
|
||||
u32 totalStorageBufferCount;
|
||||
u32 totalCombinedImageSamplerCount;
|
||||
|
||||
u32 totalWriteDescCount;
|
||||
u32 totalBufferDescCount;
|
||||
@ -100,6 +101,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
public:
|
||||
cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline;
|
||||
size_t sampledImageCount{};
|
||||
|
||||
PackedPipelineState sourcePackedState;
|
||||
|
||||
@ -111,9 +113,19 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
bool CheckBindingMatch(Pipeline *other);
|
||||
|
||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures);
|
||||
u32 GetTotalSampledImageCount() const;
|
||||
|
||||
DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind);
|
||||
/**
|
||||
* @brief Creates a descriptor set update from the current GPU state
|
||||
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
||||
*/
|
||||
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages);
|
||||
|
||||
/**
|
||||
* @brief Creates a partial descriptor set update from the current GPU state for only the subset of descriptors changed by the quick bind constant buffer
|
||||
* @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written
|
||||
*/
|
||||
DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages);
|
||||
};
|
||||
|
||||
class PipelineManager {
|
||||
|
@ -970,4 +970,13 @@ namespace skyline::gpu {
|
||||
newCycle->AttachObjects(std::move(source), shared_from_this());
|
||||
cycle = newCycle;
|
||||
}
|
||||
|
||||
bool Texture::ValidateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) {
|
||||
return lastRenderPassUsage == renderPassUsage || lastRenderPassIndex != renderPassIndex || lastRenderPassUsage == texture::RenderPassUsage::None;
|
||||
}
|
||||
|
||||
void Texture::UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) {
|
||||
lastRenderPassUsage = renderPassUsage;
|
||||
lastRenderPassIndex = renderPassIndex;
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,12 @@
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace texture {
|
||||
enum class RenderPassUsage : u8 {
|
||||
None,
|
||||
Sampled,
|
||||
RenderTarget
|
||||
};
|
||||
|
||||
struct Dimensions {
|
||||
u32 width;
|
||||
u32 height;
|
||||
@ -400,6 +406,9 @@ namespace skyline::gpu {
|
||||
|
||||
std::vector<TextureViewStorage> views;
|
||||
|
||||
u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture
|
||||
texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass
|
||||
|
||||
friend TextureManager;
|
||||
friend TextureView;
|
||||
|
||||
@ -583,5 +592,16 @@ namespace skyline::gpu {
|
||||
bool FrequentlyLocked() {
|
||||
return accumulatedCpuLockCounter >= FrequentlyLockedThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the previous usage in the renderpass is compatible with the current one
|
||||
* @return If the new usage is compatible with the previous usage
|
||||
*/
|
||||
bool ValidateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage);
|
||||
|
||||
/**
|
||||
* @brief Updates renderpass usage tracking information
|
||||
*/
|
||||
void UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage);
|
||||
};
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user