From 95a08627e5676cfc405db817ddea70aa73390427 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Wed, 22 Sep 2021 09:00:11 +0530 Subject: [PATCH] Subpass Support + More RT Formats + Fix `FenceCycle` Cyclic Dependencies Support for subpasses was added by reworking attachment reuse code to account for preserved attachments and subpass dependencies. A lot of RT formats were also added to allow SMO to boot up entirely, it should be noted that it doesn't render anything. `FenceCycle` had a cyclic dependency which broke clean exit, we now utilize `std::weak_ptr` inside the `Texture` object. A minor fix for broken stack traces was also made caused by supplying a `nullptr` C-string to libfmt when a symbol was unresolved which caused an `abort` due to invocation of `strlen` with it. --- app/src/main/cpp/skyline/common.h | 5 + app/src/main/cpp/skyline/gpu.cpp | 25 ++-- .../gpu/interconnect/command_executor.cpp | 38 ++++- .../gpu/interconnect/command_executor.h | 14 ++ .../skyline/gpu/interconnect/command_nodes.h | 138 ++++++++++++++---- .../gpu/interconnect/graphics_context.h | 58 ++++++-- .../cpp/skyline/gpu/presentation_engine.cpp | 11 +- app/src/main/cpp/skyline/gpu/texture/format.h | 37 ++++- .../main/cpp/skyline/gpu/texture/texture.cpp | 19 ++- .../main/cpp/skyline/gpu/texture/texture.h | 91 ++++++------ app/src/main/cpp/skyline/loader/loader.cpp | 4 +- .../skyline/soc/gm20b/engines/maxwell/types.h | 9 ++ 12 files changed, 332 insertions(+), 117 deletions(-) diff --git a/app/src/main/cpp/skyline/common.h b/app/src/main/cpp/skyline/common.h index 8cacb426..6d61cb69 100644 --- a/app/src/main/cpp/skyline/common.h +++ b/app/src/main/cpp/skyline/common.h @@ -397,6 +397,11 @@ namespace skyline { constexpr span(const std::span &spn) : std::span(spn) {} + /** + * @brief A single-element constructor for a span + */ + constexpr span(T &spn) : std::span(&spn, 1) {} + /** * @brief We want to support implicitly casting from std::string_view -> span as it's just a specialization of a data view which span is a generic form of, the opposite doesn't hold true as not all data held by a span is string data therefore the conversion isn't implicit there */ diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 3d2bb2c8..ae50860a 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -87,26 +87,31 @@ namespace skyline::gpu { case util::Hash(string): \ if(string == type) \ return VK_FALSE; \ - else \ - break + break #define DEBUG_VALIDATION(string) \ case util::Hash(string): \ if(string == type) \ - __builtin_debugtrap(); \ + raise(SIGTRAP); \ break + // Using __builtin_debugtrap() as opposed to raise(SIGTRAP) will result in the inability to continue std::string_view type(message); - auto first{type.find_first_of('[') + 2}; - type = type.substr(first, type.find_first_of(']', first) - 4); + auto first{type.find('[')}; + auto last{type.find(']', first)}; + if (first != std::string_view::npos && last != std::string_view::npos) { + type = type.substr(first + 2, last != std::string_view::npos ? last - 4 : last); + std::string typeStr{type}; - switch (util::Hash(type)) { - IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly - IGNORE_VALIDATION("UNASSIGNED-GeneralParameterPerfWarn-SuboptimalSwapchain"); // Same as SwapchainPreTransform + switch (util::Hash(type)) { + IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly + IGNORE_VALIDATION("UNASSIGNED-GeneralParameterPerfWarn-SuboptimalSwapchain"); // Same as SwapchainPreTransform + IGNORE_VALIDATION("UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout"); // We utilize images as VK_IMAGE_LAYOUT_GENERAL rather than optimal layouts for operations + } + + #undef IGNORE_TYPE } - #undef IGNORE_TYPE - logger->Write(severityLookup.at(std::countr_zero(static_cast(flags))), util::Format("Vk{}:{}[0x{:X}]:I{}:L{}: {}", layerPrefix, vk::to_string(vk::DebugReportObjectTypeEXT(objectType)), object, messageCode, location, message)); return VK_FALSE; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 836729b6..5eaedb23 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -5,8 +5,8 @@ #include "command_executor.h" namespace skyline::gpu::interconnect { - void CommandExecutor::AddSubpass(const std::function &, GPU &)> &function, vk::Rect2D renderArea, std::vector inputAttachments, std::vector colorAttachments, std::optional depthStencilAttachment) { - if (renderpass) { // TODO: Subpass support (&& renderpass->renderArea != renderArea) + bool CommandExecutor::CreateRenderpass(vk::Rect2D renderArea) { + if (renderpass && renderpass->renderArea != renderArea) { nodes.emplace_back(std::in_place_type_t()); renderpass = nullptr; } @@ -16,13 +16,44 @@ namespace skyline::gpu::interconnect { // We need to create a render pass if one doesn't already exist or the current one isn't compatible renderpass = &std::get(nodes.emplace_back(std::in_place_type_t(), renderArea)); - renderpass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment); + return newRenderpass; + } + + void CommandExecutor::AddSubpass(const std::function &, GPU &)> &function, vk::Rect2D renderArea, std::vector inputAttachments, std::vector colorAttachments, std::optional depthStencilAttachment) { + bool newRenderpass{CreateRenderpass(renderArea)}; + renderpass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr); if (newRenderpass) nodes.emplace_back(std::in_place_type_t(), function); else nodes.emplace_back(std::in_place_type_t(), function); } + void CommandExecutor::AddClearSubpass(TextureView attachment, const vk::ClearColorValue &value) { + bool newRenderpass{CreateRenderpass(vk::Rect2D{ + .extent = attachment.backing->dimensions, + })}; + renderpass->AddSubpass({}, attachment, nullptr); + + if (!renderpass->ClearColorAttachment(0, value)) { + auto function{[scissor = attachment.backing->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + commandBuffer.clearAttachments(vk::ClearAttachment{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .colorAttachment = 0, + .clearValue = value, + }, vk::ClearRect{ + .rect = scissor, + .baseArrayLayer = 0, + .layerCount = 1, + }); + }}; + + if (newRenderpass) + nodes.emplace_back(std::in_place_type_t(), function); + else + nodes.emplace_back(std::in_place_type_t(), function); + } + } + void CommandExecutor::Execute() { if (!nodes.empty()) { if (renderpass) { @@ -36,6 +67,7 @@ namespace skyline::gpu::interconnect { std::visit(VariantVisitor{ [&](FunctionNode &node) { node(commandBuffer, cycle, gpu); }, [&](RenderpassNode &node) { node(commandBuffer, cycle, gpu); }, + [&](NextSubpassNode &node) { node(commandBuffer, cycle, gpu); }, [&](RenderpassEndNode &node) { node(commandBuffer, cycle, gpu); }, }, node); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 8dc62e58..770920de 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -17,6 +17,11 @@ namespace skyline::gpu::interconnect { boost::container::stable_vector nodes; node::RenderpassNode *renderpass{}; + /** + * @return If a new renderpass was created by the function or the current one was reused as it was compatible + */ + bool CreateRenderpass(vk::Rect2D renderArea); + public: CommandExecutor(const DeviceState &state) : gpu(*state.gpu) {} @@ -26,6 +31,15 @@ namespace skyline::gpu::interconnect { */ void AddSubpass(const std::function &, GPU &)> &function, vk::Rect2D renderArea, std::vector inputAttachments = {}, std::vector colorAttachments = {}, std::optional depthStencilAttachment = {}); + /** + * @brief Adds a subpass that clears the entirety of the specified attachment with a value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible + * @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution + */ + void AddClearSubpass(TextureView attachment, const vk::ClearColorValue& value); + + /** + * @brief Execute all the nodes and submit the resulting command buffer to the GPU + */ void Execute(); }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h index 87c7dc24..9dc30f23 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h @@ -49,7 +49,17 @@ namespace skyline::gpu::interconnect::node { std::vector attachmentDescriptions; std::vector attachmentReferences; - std::vector> preserveAttachmentReferences; //!< Any attachment that must be preserved to be utilized by a future subpass, these are stored per-subpass to ensure contiguity + std::vector> preserveAttachmentReferences; //!< Any attachment that must be preserved to be utilized by a future subpass, these are stored per-subpass to ensure contiguity + + constexpr static uintptr_t DepthStencilNull{std::numeric_limits::max()}; //!< A sentinel value to denote the lack of a depth stencil attachment in a VkSubpassDescription + + /** + * @brief Rebases a pointer containing an offset relative to the beginning of a container + */ + template + T *RebasePointer(const Container &container, const T *offset) { + return reinterpret_cast(reinterpret_cast(container.data()) + reinterpret_cast(offset)); + } public: std::vector subpassDescriptions; @@ -70,39 +80,81 @@ namespace skyline::gpu::interconnect::node { if (texture == textures.end()) textures.push_back(view.backing); - vk::AttachmentDescription attachmentDescription{ - .format = *view.format, - .initialLayout = view.backing->layout, - .finalLayout = view.backing->layout, - }; - auto vkView{view.GetView()}; auto attachment{std::find(attachments.begin(), attachments.end(), vkView)}; - if (attachment == attachments.end() || attachmentDescriptions[std::distance(attachments.begin(), attachment)] != attachmentDescription) { + if (attachment == attachments.end()) { // If we cannot find any matches for the specified attachment, we add it as a new one attachments.push_back(vkView); - attachmentDescriptions.push_back(attachmentDescription); + attachmentDescriptions.push_back(vk::AttachmentDescription{ + .format = *view.format, + .initialLayout = view.backing->layout, + .finalLayout = view.backing->layout, + }); return attachments.size() - 1; } else { // If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass auto attachmentIndex{std::distance(attachments.begin(), attachment)}; - auto attachmentReferenceIt{std::find_if(attachmentReferences.begin(), attachmentReferences.end(), [&](const vk::AttachmentReference &reference) { - return reference.attachment == attachmentIndex; - })}; - auto attachmentReferenceOffset{std::distance(attachmentReferences.begin(), attachmentReferenceIt) * sizeof(vk::AttachmentReference)}; - auto subpassDescriptionIt{std::find_if(subpassDescriptions.begin(), subpassDescriptions.end(), [&](const vk::SubpassDescription &description) { - return reinterpret_cast(description.pDepthStencilAttachment) > attachmentReferenceOffset; - })}; + auto it{subpassDescriptions.begin()}; + for (; it != subpassDescriptions.end(); it++) { + auto referenceBeginIt{attachmentReferences.begin()}; + referenceBeginIt += reinterpret_cast(it->pInputAttachments) / sizeof(vk::AttachmentReference); - for (ssize_t subpassIndex{std::distance(subpassDescriptions.begin(), subpassDescriptionIt)}; subpassIndex != subpassDescriptions.size(); subpassIndex++) - preserveAttachmentReferences[subpassIndex].push_back(attachmentIndex); + auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; // We depend on all attachments being contiguous for a subpass, this will horribly break if that assumption is broken + if (reinterpret_cast(it->pDepthStencilAttachment) != DepthStencilNull) + referenceEndIt++; - return std::distance(attachments.begin(), attachment); + if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) { + return reference.attachment == attachmentIndex; + }) != referenceEndIt) + break; // The first subpass that utilizes the attachment we want to preserve + } + + if (it == subpassDescriptions.end()) + throw exception("Cannot find corresponding subpass for attachment #{}", attachmentIndex); + + auto lastUsageIt{it}; + for (; it != subpassDescriptions.end(); it++) { + auto referenceBeginIt{attachmentReferences.begin()}; + referenceBeginIt += reinterpret_cast(it->pInputAttachments) / sizeof(vk::AttachmentReference); + + auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; + if (reinterpret_cast(it->pDepthStencilAttachment) != DepthStencilNull) + referenceEndIt++; + + if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) { + return reference.attachment == attachmentIndex; + }) != referenceEndIt) { + lastUsageIt = it; + continue; // If a subpass uses an attachment then it doesn't need to be preserved + } + + auto &subpassPreserveAttachments{preserveAttachmentReferences[std::distance(subpassDescriptions.begin(), it)]}; + if (std::find(subpassPreserveAttachments.begin(), subpassPreserveAttachments.end(), attachmentIndex) != subpassPreserveAttachments.end()) + subpassPreserveAttachments.push_back(attachmentIndex); + } + + vk::SubpassDependency dependency{ + .srcSubpass = static_cast(std::distance(subpassDescriptions.begin(), lastUsageIt)), + .dstSubpass = static_cast(subpassDescriptions.size()), // We assume that the next subpass is using the attachment + .srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput, + .dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead, + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + }; + + if (std::find(subpassDependencies.begin(), subpassDependencies.end(), dependency) == subpassDependencies.end()) + subpassDependencies.push_back(dependency); + + return attachmentIndex; } } - void AddSubpass(std::vector &inputAttachments, std::vector &colorAttachments, std::optional &depthStencilAttachment) { + /** + * @brief Creates a subpass with the attachments bound in the specified order + */ + void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0)); auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; @@ -131,29 +183,57 @@ namespace skyline::gpu::interconnect::node { preserveAttachmentReferences.emplace_back(); // We need to create storage for any attachments that might need to preserved by this pass - // Note: We encode the offsets as the pointers due to vector pointer invalidation, the vector offset will be added to them prior to submission + // Note: We encode the offsets as the pointers due to vector pointer invalidation, RebasePointer(...) can be utilized to deduce the real pointer subpassDescriptions.push_back(vk::SubpassDescription{ .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, .inputAttachmentCount = static_cast(inputAttachments.size()), .pInputAttachments = reinterpret_cast(inputAttachmentsOffset), .colorAttachmentCount = static_cast(colorAttachments.size()), .pColorAttachments = reinterpret_cast(colorAttachmentsOffset), - .pDepthStencilAttachment = reinterpret_cast(depthStencilAttachment ? depthStencilAttachmentOffset : std::numeric_limits::max()), + .pDepthStencilAttachment = reinterpret_cast(depthStencilAttachment ? depthStencilAttachmentOffset : DepthStencilNull), }); } + /** + * @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_LOAD + * @param colorAttachment The index of the attachment in the attachments bound to the current subpass + * @return If the attachment could be cleared or not due to conflicts with other operations + * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass + */ + bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) { + auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment}; + auto attachmentIndex{attachmentReference->attachment}; + + for (const auto &reference : attachmentReferences) + if (reference.attachment == attachmentIndex && &reference != attachmentReference) + return false; + + auto &attachmentDescription{attachmentDescriptions.at(attachmentIndex)}; + if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eLoad) { + attachmentDescription.loadOp = vk::AttachmentLoadOp::eClear; + + clearValues.resize(attachmentIndex + 1); + clearValues[attachmentIndex].color = value; + + return true; + } else if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eClear && clearValues[attachmentIndex].color.uint32 == value.uint32) { + return true; + } + + return false; + } + void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { storage->device = &gpu.vkDevice; auto preserveAttachmentIt{preserveAttachmentReferences.begin()}; - auto attachmentReferenceOffset{reinterpret_cast(attachmentReferences.data())}; for (auto &subpassDescription : subpassDescriptions) { - subpassDescription.pInputAttachments = reinterpret_cast(attachmentReferenceOffset + reinterpret_cast(subpassDescription.pInputAttachments)); - subpassDescription.pColorAttachments = reinterpret_cast(attachmentReferenceOffset + reinterpret_cast(subpassDescription.pColorAttachments)); + subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments); + subpassDescription.pColorAttachments = RebasePointer(attachmentReferences, subpassDescription.pColorAttachments); auto depthStencilAttachmentOffset{reinterpret_cast(subpassDescription.pDepthStencilAttachment)}; - if (depthStencilAttachmentOffset != std::numeric_limits::max()) - subpassDescription.pDepthStencilAttachment = reinterpret_cast(attachmentReferenceOffset + depthStencilAttachmentOffset); + if (depthStencilAttachmentOffset != DepthStencilNull) + subpassDescription.pDepthStencilAttachment = RebasePointer(attachmentReferences, subpassDescription.pDepthStencilAttachment); else subpassDescription.pDepthStencilAttachment = nullptr; @@ -165,7 +245,7 @@ namespace skyline::gpu::interconnect::node { for (auto &texture : storage->textures) { texture->lock(); texture->WaitOnBacking(); - if (texture->cycle != cycle) + if (texture->cycle.lock() != cycle) texture->WaitOnFence(); } @@ -209,7 +289,7 @@ namespace skyline::gpu::interconnect::node { /** * @brief A FunctionNode which progresses to the next subpass prior to calling the function */ - struct NextSubpassNode : FunctionNode { + struct NextSubpassNode : private FunctionNode { using FunctionNode::FunctionNode; void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 7b811f61..d2dd0062 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -88,12 +88,30 @@ namespace skyline::gpu::interconnect { switch (format) { case maxwell3d::RenderTarget::ColorFormat::None: return {}; + case maxwell3d::RenderTarget::ColorFormat::R32B32G32A32Float: + return format::R32B32G32A32Float; + case maxwell3d::RenderTarget::ColorFormat::R16G16B16A16Float: + return format::R16G16B16A16Float; case maxwell3d::RenderTarget::ColorFormat::A2B10G10R10Unorm: return format::A2B10G10R10Unorm; case maxwell3d::RenderTarget::ColorFormat::R8G8B8A8Unorm: return format::R8G8B8A8Unorm; case maxwell3d::RenderTarget::ColorFormat::A8B8G8R8Srgb: return format::A8B8G8R8Srgb; + case maxwell3d::RenderTarget::ColorFormat::R16G16Snorm: + return format::R16G16Snorm; + case maxwell3d::RenderTarget::ColorFormat::R16G16Float: + return format::R16G16Float; + case maxwell3d::RenderTarget::ColorFormat::B10G11R11Float: + return format::B10G11R11Float; + case maxwell3d::RenderTarget::ColorFormat::R32Float: + return format::R32Float; + case maxwell3d::RenderTarget::ColorFormat::R8G8Snorm: + return format::R8G8Snorm; + case maxwell3d::RenderTarget::ColorFormat::R16Float: + return format::R16Float; + case maxwell3d::RenderTarget::ColorFormat::R8Unorm: + return format::R8Unorm; default: throw exception("Cannot translate the supplied RT format: 0x{:X}", static_cast(format)); } @@ -206,19 +224,33 @@ namespace skyline::gpu::interconnect { aspect |= vk::ImageAspectFlagBits::eColor; aspect &= renderTarget.format->vkAspect; - executor.AddSubpass([aspect = aspect, clearColorValue = clearColorValue, layerId = clear.layerId, scissor = scissors.at(renderTargetIndex)](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { - commandBuffer.clearAttachments(vk::ClearAttachment{ - .aspectMask = aspect, - .colorAttachment = 0, - .clearValue = clearColorValue, - }, vk::ClearRect{ - .rect = scissor, - .baseArrayLayer = layerId, - .layerCount = 1, - }); - }, vk::Rect2D{ - .extent = renderTarget.backing->dimensions, - }, {}, {renderTarget}); + if (aspect == vk::ImageAspectFlags{}) + return; + + auto scissor{scissors.at(renderTargetIndex)}; + scissor.extent.width = std::min(renderTarget.backing->dimensions.width - scissor.offset.x, scissor.extent.width); + scissor.extent.height = std::min(renderTarget.backing->dimensions.height - scissor.offset.y, scissor.extent.height); + + if (scissor.extent.width == 0 || scissor.extent.height == 0) + return; + + if (scissor.extent.width == renderTarget.backing->dimensions.width && scissor.extent.width == renderTarget.backing->dimensions.width && renderTarget.range.baseArrayLayer == 0 && renderTarget.range.layerCount == 1 && clear.layerId == 0) { + executor.AddClearSubpass(renderTarget, clearColorValue); + } else { + executor.AddSubpass([aspect, clearColorValue = clearColorValue, layerId = clear.layerId, scissor](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + commandBuffer.clearAttachments(vk::ClearAttachment{ + .aspectMask = aspect, + .colorAttachment = 0, + .clearValue = clearColorValue, + }, vk::ClearRect{ + .rect = scissor, + .baseArrayLayer = layerId, + .layerCount = 1, + }); + }, vk::Rect2D{ + .extent = renderTarget.backing->dimensions, + }, {}, {renderTarget}); + } } } diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 97fa753e..8a82e651 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -53,7 +53,7 @@ namespace skyline::gpu { engine->vsyncEvent->Signal(); // Post the frame callback to be triggered on the next display refresh - AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), engine); + AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), engine); } void PresentationEngine::ChoreographerThread() { @@ -61,7 +61,7 @@ namespace skyline::gpu { try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); choreographerLooper = ALooper_prepare(0); - AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), this); + AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), this); ALooper_pollAll(-1, nullptr, nullptr, nullptr); // Will block and process callbacks till ALooper_wake() is called } catch (const signal::SignalException &e) { state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames)); @@ -122,6 +122,11 @@ namespace skyline::gpu { if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage) throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags)); + auto requestedMode{state.settings->disableFrameThrottling ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eFifo}; + auto modes{gpu.vkPhysicalDevice.getSurfacePresentModesKHR(**vkSurface)}; + if (std::find(modes.begin(), modes.end(), requestedMode) == modes.end()) + throw exception("Swapchain doesn't support present mode: {}", vk::to_string(requestedMode)); + vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ .surface = **vkSurface, .minImageCount = minImageCount, @@ -132,7 +137,7 @@ namespace skyline::gpu { .imageUsage = presentUsage, .imageSharingMode = vk::SharingMode::eExclusive, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit, - .presentMode = state.settings->disableFrameThrottling ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eFifo, + .presentMode = requestedMode, .clipped = true, }); diff --git a/app/src/main/cpp/skyline/gpu/texture/format.h b/app/src/main/cpp/skyline/gpu/texture/format.h index 85116991..61bbdadf 100644 --- a/app/src/main/cpp/skyline/gpu/texture/format.h +++ b/app/src/main/cpp/skyline/gpu/texture/format.h @@ -9,11 +9,24 @@ namespace skyline::gpu::format { using Format = gpu::texture::FormatBase; using vkf = vk::Format; using vka = vk::ImageAspectFlagBits; + using swc = gpu::texture::SwizzleChannel; - constexpr Format R8G8B8A8Unorm{sizeof(u32), 1, 1, vkf::eR8G8B8A8Unorm, vka::eColor}; - constexpr Format R5G6B5Unorm{sizeof(u16), 1, 1, vkf::eR5G6B5UnormPack16, vka::eColor}; - constexpr Format A2B10G10R10Unorm{sizeof(u32), 1, 1, vkf::eA2B10G10R10UnormPack32, vka::eColor}; - constexpr Format A8B8G8R8Srgb{sizeof(u32), 1, 1, vkf::eA8B8G8R8SrgbPack32, vka::eColor}; + constexpr Format R8G8B8A8Unorm{sizeof(u32), vkf::eR8G8B8A8Unorm}; + constexpr Format R5G6B5Unorm{sizeof(u16), vkf::eR5G6B5UnormPack16}; + constexpr Format A2B10G10R10Unorm{sizeof(u32), vkf::eA2B10G10R10UnormPack32}; + constexpr Format A8B8G8R8Srgb{sizeof(u32), vkf::eA8B8G8R8SrgbPack32}; + constexpr Format R16G16Snorm{sizeof(u32), vkf::eR16G16Snorm}; + constexpr Format R16G16Float{sizeof(u32), vkf::eR16G16Sfloat}; + constexpr Format B10G11R11Float{sizeof(u32), vkf::eB10G11R11UfloatPack32}; + constexpr Format R32Float{sizeof(u32), vkf::eR32Sfloat}; + constexpr Format R8G8Snorm{sizeof(u16), vkf::eR8G8Snorm}; + constexpr Format R16Float{sizeof(u16), vkf::eR16Sfloat}; + constexpr Format R8Unorm{sizeof(u8), vkf::eR8Unorm}; + constexpr Format R32B32G32A32Float{sizeof(u32) * 4, vkf::eR32G32B32A32Sfloat, .swizzle = { + .blue = swc::Green, + .green = swc::Blue, + }}; + constexpr Format R16G16B16A16Float{sizeof(u16) * 4, vkf::eR16G16B16A16Sfloat}; /** * @brief Converts a Vulkan format to a Skyline format @@ -28,6 +41,22 @@ namespace skyline::gpu::format { return A2B10G10R10Unorm; case vk::Format::eA8B8G8R8SrgbPack32: return A8B8G8R8Srgb; + case vk::Format::eR16G16Snorm: + return R16G16Snorm; + case vk::Format::eR16G16Sfloat: + return R16G16Float; + case vk::Format::eB10G11R11UfloatPack32: + return B10G11R11Float; + case vk::Format::eR32Sfloat: + return format::R32Float; + case vk::Format::eR16Sfloat: + return R16Float; + case vk::Format::eR8G8Snorm: + return R8G8Snorm; + case vk::Format::eR8Unorm: + return R8Unorm; + case vk::Format::eR16G16B16A16Sfloat: + return R16G16B16A16Float; default: throw exception("Vulkan format not supported: '{}'", vk::to_string(format)); } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 5c7b9df9..fcfae680 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -101,8 +101,9 @@ namespace skyline::gpu { } void Texture::WaitOnFence() { - if (cycle) { - cycle->Wait(); + auto lCycle{cycle.lock()}; + if (lCycle) { + lCycle->Wait(); cycle.reset(); } } @@ -232,7 +233,7 @@ namespace skyline::gpu { throw exception("Backing properties changing during sync is not supported"); WaitOnFence(); - cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { auto image{GetBacking()}; if (layout != vk::ImageLayout::eTransferDstOptimal) { commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ @@ -277,8 +278,9 @@ namespace skyline::gpu { .layerCount = 1, }, }); - }); - cycle->AttachObjects(stagingBuffer, shared_from_this()); + })}; + lCycle->AttachObjects(stagingBuffer, shared_from_this()); + cycle = lCycle; } } @@ -309,7 +311,7 @@ namespace skyline::gpu { else if (source->format != format) throw exception("Cannot copy from image with different format"); - cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { auto sourceBacking{source->GetBacking()}; if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ @@ -377,8 +379,9 @@ namespace skyline::gpu { .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .subresourceRange = subresource, }); - }); - cycle->AttachObjects(std::move(source), shared_from_this()); + })}; + lCycle->AttachObjects(std::move(source), shared_from_this()); + cycle = lCycle; } TextureView::TextureView(std::shared_ptr backing, vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format format, vk::ComponentMapping mapping) : backing(std::move(backing)), type(type), format(format), mapping(mapping), range(range) {} diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 0140cbc0..0f8dbc23 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -58,15 +58,58 @@ namespace skyline::gpu { } }; + enum class SwizzleChannel : u8 { + Zero, //!< Write 0 to the channel + One, //!< Write 1 to the channel + Red, //!< Red color channel + Green, //!< Green color channel + Blue, //!< Blue color channel + Alpha, //!< Alpha channel + }; + + struct Swizzle { + SwizzleChannel red{SwizzleChannel::Red}; //!< Swizzle for the red channel + SwizzleChannel green{SwizzleChannel::Green}; //!< Swizzle for the green channel + SwizzleChannel blue{SwizzleChannel::Blue}; //!< Swizzle for the blue channel + SwizzleChannel alpha{SwizzleChannel::Alpha}; //!< Swizzle for the alpha channel + + constexpr operator vk::ComponentMapping() { + auto swizzleConvert{[](SwizzleChannel channel) { + switch (channel) { + case SwizzleChannel::Zero: + return vk::ComponentSwizzle::eZero; + case SwizzleChannel::One: + return vk::ComponentSwizzle::eOne; + case SwizzleChannel::Red: + return vk::ComponentSwizzle::eR; + case SwizzleChannel::Green: + return vk::ComponentSwizzle::eG; + case SwizzleChannel::Blue: + return vk::ComponentSwizzle::eB; + case SwizzleChannel::Alpha: + return vk::ComponentSwizzle::eA; + } + }}; + + return vk::ComponentMapping{ + .r = swizzleConvert(red), + .g = swizzleConvert(green), + .b = swizzleConvert(blue), + .a = swizzleConvert(alpha), + }; + } + }; + /** * @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed) */ struct FormatBase { u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats - u16 blockHeight{}; //!< The height of a block in pixels - u16 blockWidth{}; //!< The width of a block in pixels vk::Format vkFormat{vk::Format::eUndefined}; vk::ImageAspectFlags vkAspect{vk::ImageAspectFlagBits::eColor}; + Swizzle swizzle{}; + u16 blockHeight{1}; //!< The height of a block in pixels + u16 blockWidth{1}; //!< The width of a block in pixels constexpr bool IsCompressed() const { return (blockHeight != 1) || (blockWidth != 1); @@ -182,48 +225,6 @@ namespace skyline::gpu { } }; - enum class SwizzleChannel : u8 { - Zero, //!< Write 0 to the channel - One, //!< Write 1 to the channel - Red, //!< Red color channel - Green, //!< Green color channel - Blue, //!< Blue color channel - Alpha, //!< Alpha channel - }; - - struct Swizzle { - SwizzleChannel red{SwizzleChannel::Red}; //!< Swizzle for the red channel - SwizzleChannel green{SwizzleChannel::Green}; //!< Swizzle for the green channel - SwizzleChannel blue{SwizzleChannel::Blue}; //!< Swizzle for the blue channel - SwizzleChannel alpha{SwizzleChannel::Alpha}; //!< Swizzle for the alpha channel - - constexpr operator vk::ComponentMapping() { - auto swizzleConvert{[](SwizzleChannel channel) { - switch (channel) { - case SwizzleChannel::Zero: - return vk::ComponentSwizzle::eZero; - case SwizzleChannel::One: - return vk::ComponentSwizzle::eOne; - case SwizzleChannel::Red: - return vk::ComponentSwizzle::eR; - case SwizzleChannel::Green: - return vk::ComponentSwizzle::eG; - case SwizzleChannel::Blue: - return vk::ComponentSwizzle::eB; - case SwizzleChannel::Alpha: - return vk::ComponentSwizzle::eA; - } - }}; - - return vk::ComponentMapping{ - .r = swizzleConvert(red), - .g = swizzleConvert(green), - .b = swizzleConvert(blue), - .a = swizzleConvert(alpha), - }; - } - }; - /** * @brief The type of a texture to determine the access patterns for it * @note This is effectively the Tegra X1 texture types with the 1DBuffer + 2DNoMipmap removed as those are handled elsewhere @@ -314,7 +315,7 @@ namespace skyline::gpu { friend TextureView; public: - std::shared_ptr cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing + std::weak_ptr cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing std::optional guest; texture::Dimensions dimensions; texture::Format format; diff --git a/app/src/main/cpp/skyline/loader/loader.cpp b/app/src/main/cpp/skyline/loader/loader.cpp index ec464ace..d785716d 100644 --- a/app/src/main/cpp/skyline/loader/loader.cpp +++ b/app/src/main/cpp/skyline/loader/loader.cpp @@ -84,7 +84,7 @@ namespace skyline::loader { size_t length{}; std::unique_ptr demangled{abi::__cxa_demangle(symbol.name, nullptr, &length, &status), std::free}; - return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast(pointer), (status == 0) ? std::string(demangled.get()) : symbol.name, symbol.executableName); + return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast(pointer), (status == 0) ? std::string_view(demangled.get()) : symbol.name, symbol.executableName); } else if (!symbol.executableName.empty()) { return fmt::format("\n* 0x{:X} (from {})", reinterpret_cast(pointer), symbol.executableName); } else if (dladdr(pointer, &info)) { @@ -92,7 +92,7 @@ namespace skyline::loader { size_t length{}; std::unique_ptr demangled{abi::__cxa_demangle(info.dli_sname, nullptr, &length, &status), std::free}; - return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast(pointer), (status == 0) ? std::string(demangled.get()) : info.dli_sname, info.dli_fname); + return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast(pointer), (status == 0) ? std::string_view(demangled.get()) : info.dli_sname ? info.dli_sname : "Unresolved", info.dli_fname ? info.dli_fname : "Unresolved"); } else { return fmt::format("\n* 0x{:X}", reinterpret_cast(pointer)); } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h index 7610ce31..47b3acd7 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h @@ -42,9 +42,18 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type { enum class ColorFormat : u32 { None = 0x0, + R32B32G32A32Float = 0xC0, + R16G16B16A16Float = 0xCA, A2B10G10R10Unorm = 0xD1, R8G8B8A8Unorm = 0xD5, A8B8G8R8Srgb = 0xD6, + R16G16Snorm = 0xDB, + R16G16Float = 0xDE, + B10G11R11Float = 0xE0, + R32Float = 0xE5, + R8G8Snorm = 0xEB, + R16Float = 0xF2, + R8Unorm = 0xF3, } format; struct TileMode {