From 94e6f3cfa0f2182aa377afa6f3d960d92fabf080 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sun, 24 Apr 2022 16:18:36 +0530 Subject: [PATCH] Add quirk for relaxed render pass compatibility As we require a relaxed version of the Vulkan render pass compatibility clause for caching multi-subpass render passes, we now utilize a quirk to determine if this is supported which it is on Nvidia/Adreno while AMD/Mali where it isn't supported we force single-subpass render passes. --- .../cpp/skyline/gpu/interconnect/command_executor.cpp | 8 +++++++- .../cpp/skyline/gpu/interconnect/command_executor.h | 3 ++- .../cpp/skyline/gpu/interconnect/graphics_context.h | 2 +- app/src/main/cpp/skyline/gpu/trait_manager.cpp | 10 ++++++++-- app/src/main/cpp/skyline/gpu/trait_manager.h | 3 ++- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index eec0bb4f..f1b0e9e2 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -51,7 +51,13 @@ namespace skyline::gpu::interconnect { cycle->AttachObject(dependency); } - void CommandExecutor::AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { + void CommandExecutor::AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool exclusiveSubpass) { + if (exclusiveSubpass && renderPass) { + nodes.emplace_back(std::in_place_type_t()); + renderPass = nullptr; + subpassCount = 0; + } + bool newRenderPass{CreateRenderPass(renderArea)}; renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr); if (newRenderPass) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 892e66f3..4a8245f7 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -57,9 +57,10 @@ namespace skyline::gpu::interconnect { /** * @brief Adds a command that needs to be executed inside a subpass configured with certain attachments + * @param exclusiveSubpass If this subpass should be the only subpass in a render pass * @note Any supplied texture should be attached prior and not undergo any persistent layout transitions till execution */ - void AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments = {}, span colorAttachments = {}, TextureView *depthStencilAttachment = {}); + void AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments = {}, span colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool exclusiveSubpass = false); /** * @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index c1e60148..c6832df9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -2897,7 +2897,7 @@ namespace skyline::gpu::interconnect { cycle->AttachObject(drawStorage); }, vk::Rect2D{ .extent = activeColorRenderTargets.empty() ? depthRenderTarget.guest.dimensions : activeColorRenderTargets.front()->texture->dimensions, - }, {}, activeColorRenderTargets, depthRenderTargetView); + }, {}, activeColorRenderTargets, depthRenderTargetView, !gpu.traits.quirks.relaxedRenderPassCompatibility); } void DrawVertex(u32 vertexCount, u32 firstVertex) { diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.cpp b/app/src/main/cpp/skyline/gpu/trait_manager.cpp index 6d321887..91e6c7d1 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/trait_manager.cpp @@ -143,6 +143,7 @@ namespace skyline::gpu { needsIndividualTextureBindingWrites = true; vkImageMutableFormatCostly = true; // Disables UBWC brokenDescriptorAliasing = true; + relaxedRenderPassCompatibility = true; // Adreno drivers support relaxed render pass compatibility rules if (deviceProperties.driverVersion < VK_MAKE_VERSION(512, 600, 0)) maxSubpassCount = 64; // Driver will segfault while destroying the renderpass and associated objects if this is exceeded on all 5xx and below drivers @@ -160,6 +161,11 @@ namespace skyline::gpu { break; } + case vk::DriverId::eNvidiaProprietary: { + relaxedRenderPassCompatibility = true; + break; + } + case vk::DriverId::eAmdProprietary: { maxGlobalPriority = vk::QueueGlobalPriorityEXT::eHigh; break; @@ -172,8 +178,8 @@ namespace skyline::gpu { std::string TraitManager::QuirkManager::Summary() { return fmt::format( - "\n* Needs Individual Texture Binding Writes: {}\n* VkImage Mutable Format is costly: {}\n* Broken Descriptor Aliasing: {}\n* Max Subpass Count: {}\n* Max Global Queue Priority: {}", - needsIndividualTextureBindingWrites, vkImageMutableFormatCostly, brokenDescriptorAliasing, maxSubpassCount, vk::to_string(maxGlobalPriority) + "\n* Needs Individual Texture Binding Writes: {}\n* VkImage Mutable Format is costly: {}\n* Broken Descriptor Aliasing: {}\n* Relaxed Render Pass Compatibility: {}\n* Max Subpass Count: {}\n* Max Global Queue Priority: {}", + needsIndividualTextureBindingWrites, vkImageMutableFormatCostly, brokenDescriptorAliasing, relaxedRenderPassCompatibility, maxSubpassCount, vk::to_string(maxGlobalPriority) ); } diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.h b/app/src/main/cpp/skyline/gpu/trait_manager.h index dd260dad..f32d912a 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.h +++ b/app/src/main/cpp/skyline/gpu/trait_manager.h @@ -46,9 +46,10 @@ namespace skyline::gpu { bool needsIndividualTextureBindingWrites{}; //!< [Adreno Proprietary] A bug that requires descriptor set writes for VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER to be done individually with descriptorCount = 1 rather than batched bool vkImageMutableFormatCostly{}; //!< [Adreno Proprietary/Freedreno] An indication that VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT is costly and should not be enabled unless absolutely necessary (Disables UBWC on Adreno GPUs) bool brokenDescriptorAliasing{}; //!< [Adreno Proprietary] A bug that causes alised descriptor sets to be incorrectly interpreted by the shader compiler leading to it buggering up LLVM function argument types and crashing + bool relaxedRenderPassCompatibility{}; //!< [Adreno Proprietary/Freedreno] A relaxed version of Vulkan specification's render pass compatibility clause which allows for caching pipeline objects for multi-subpass renderpasses, this is intentionally disabled by default as it requires testing prior to enabling u32 maxSubpassCount{std::numeric_limits::max()}; //!< The maximum amount of subpasses within a renderpass, this is limited to 64 on older Adreno proprietary drivers - vk::QueueGlobalPriorityEXT maximumGlobalPriority{vk::QueueGlobalPriorityEXT::eMedium}; //!< The highest allowed global priority of the queue, drivers will not allow higher priorities to be set on queues + vk::QueueGlobalPriorityEXT maxGlobalPriority{vk::QueueGlobalPriorityEXT::eMedium}; //!< The highest allowed global priority of the queue, drivers will not allow higher priorities to be set on queues QuirkManager() = default;