diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 19a29969..4591715c 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -102,6 +102,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp + ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/gm20b/channel.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp diff --git a/app/src/main/cpp/skyline/common/thread_local.h b/app/src/main/cpp/skyline/common/thread_local.h index 2595979a..a1993a4f 100644 --- a/app/src/main/cpp/skyline/common/thread_local.h +++ b/app/src/main/cpp/skyline/common/thread_local.h @@ -24,8 +24,7 @@ namespace skyline { public: template ThreadLocal(Args &&... args) : constructor([args...]() { return new Type(args...); }) { - int result; - if ((result = pthread_key_create(&key, nullptr))) + if (int result = pthread_key_create(&key, nullptr)) throw exception("Cannot create pthread_key: {}", strerror(result)); } @@ -34,9 +33,8 @@ namespace skyline { if (pointer) return static_cast(pointer); - int result; Type *object{constructor(*this)}; - if ((result = pthread_setspecific(key, object))) + if (int result = pthread_setspecific(key, object)) throw exception("Cannot set pthread_key to constructed type: {}", strerror(result)); return object; @@ -89,8 +87,7 @@ namespace skyline { static_cast(object)->~IntrustiveTypeNode(); }}; - int result; - if ((result = pthread_key_create(&key, destructor))) + if (int result = pthread_key_create(&key, destructor)) throw exception("Cannot create pthread_key: {}", strerror(result)); } @@ -99,9 +96,8 @@ namespace skyline { if (pointer) return &static_cast(pointer)->object; - int result; IntrustiveTypeNode *node{constructor(*this)}; - if ((result = pthread_setspecific(key, node))) + if (int result = pthread_setspecific(key, node)) throw exception("Cannot set pthread_key to constructed type: {}", strerror(result)); auto next{list.load(std::memory_order_acquire)}; diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index ae50860a..99cb88df 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -85,13 +85,13 @@ namespace skyline::gpu { #define IGNORE_VALIDATION(string) \ case util::Hash(string): \ - if(string == type) \ + if (string == type) \ return VK_FALSE; \ break #define DEBUG_VALIDATION(string) \ case util::Hash(string): \ - if(string == type) \ + if (string == type) \ raise(SIGTRAP); \ break // Using __builtin_debugtrap() as opposed to raise(SIGTRAP) will result in the inability to continue @@ -101,7 +101,6 @@ namespace skyline::gpu { auto last{type.find(']', first)}; if (first != std::string_view::npos && last != std::string_view::npos) { type = type.substr(first + 2, last != std::string_view::npos ? last - 4 : last); - std::string typeStr{type}; switch (util::Hash(type)) { IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 06bf7c9e..db66e1e5 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -5,6 +5,8 @@ #include "command_executor.h" namespace skyline::gpu::interconnect { + CommandExecutor::CommandExecutor(const DeviceState &state) : gpu(*state.gpu) {} + bool CommandExecutor::CreateRenderpass(vk::Rect2D renderArea) { if (renderpass && renderpass->renderArea != renderArea) { nodes.emplace_back(std::in_place_type_t()); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 7884e5f6..a63755e5 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -25,7 +25,7 @@ namespace skyline::gpu::interconnect { bool CreateRenderpass(vk::Rect2D renderArea); public: - CommandExecutor(const DeviceState &state) : gpu(*state.gpu) {} + CommandExecutor(const DeviceState &state); /** * @brief Adds a command that needs to be executed inside a subpass configured with certain attachments diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp new file mode 100644 index 00000000..ac0fb18c --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "command_nodes.h" + +namespace skyline::gpu::interconnect::node { + RenderpassNode::Storage::~Storage() { + if (device) { + if (framebuffer) + (**device).destroy(framebuffer, nullptr, *device->getDispatcher()); + if (renderpass) + (**device).destroy(renderpass, nullptr, *device->getDispatcher()); + } + } + + u32 RenderpassNode::AddAttachment(TextureView &view) { + auto &textures{storage->textures}; + auto texture{std::find(textures.begin(), textures.end(), view.backing)}; + if (texture == textures.end()) + textures.push_back(view.backing); + + auto vkView{view.GetView()}; + auto attachment{std::find(attachments.begin(), attachments.end(), vkView)}; + if (attachment == attachments.end()) { + // If we cannot find any matches for the specified attachment, we add it as a new one + attachments.push_back(vkView); + attachmentDescriptions.push_back(vk::AttachmentDescription{ + .format = *view.format, + .initialLayout = view.backing->layout, + .finalLayout = view.backing->layout, + }); + return attachments.size() - 1; + } else { + // If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass + auto attachmentIndex{std::distance(attachments.begin(), attachment)}; + + auto it{subpassDescriptions.begin()}; + for (; it != subpassDescriptions.end(); it++) { + // Find the bounds for the attachment references belonging to the current subpass + auto referenceBeginIt{attachmentReferences.begin()}; + referenceBeginIt += reinterpret_cast(it->pInputAttachments) / sizeof(vk::AttachmentReference); + + auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; // We depend on all attachments being contiguous for a subpass, this will horribly break if that assumption is broken + if (reinterpret_cast(it->pDepthStencilAttachment) != NoDepthStencil) + referenceEndIt++; + + // Iterate over all attachment references in the current subpass to see if they point to our target attachment + if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) { + return reference.attachment == attachmentIndex; + }) != referenceEndIt) + break; // The iterator should be set to the first subpass that utilizes the attachment we want to preserve + } + + if (it == subpassDescriptions.end()) + throw exception("Cannot find corresponding subpass for attachment #{}", attachmentIndex); + + auto lastUsageIt{it}; //!< The last subpass that the attachment has been used in for creating a dependency + for (; it != subpassDescriptions.end(); it++) { + auto referenceBeginIt{attachmentReferences.begin()}; + referenceBeginIt += reinterpret_cast(it->pInputAttachments) / sizeof(vk::AttachmentReference); + + auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; + if (reinterpret_cast(it->pDepthStencilAttachment) != NoDepthStencil) + referenceEndIt++; + + if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) { + return reference.attachment == attachmentIndex; + }) != referenceEndIt) { + lastUsageIt = it; + continue; // If a subpass uses an attachment then it doesn't need to be preserved + } + + auto &subpassPreserveAttachments{preserveAttachmentReferences[std::distance(subpassDescriptions.begin(), it)]}; + if (std::find(subpassPreserveAttachments.begin(), subpassPreserveAttachments.end(), attachmentIndex) != subpassPreserveAttachments.end()) + subpassPreserveAttachments.push_back(attachmentIndex); + } + + vk::SubpassDependency dependency{ + .srcSubpass = static_cast(std::distance(subpassDescriptions.begin(), lastUsageIt)), + .dstSubpass = static_cast(subpassDescriptions.size()), // We assume that the next subpass is using the attachment + .srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput, + .dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead, + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + }; + + if (std::find(subpassDependencies.begin(), subpassDependencies.end(), dependency) == subpassDependencies.end()) + subpassDependencies.push_back(dependency); + + return attachmentIndex; + } + } + + void RenderpassNode::AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { + attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0)); + + auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; + for (auto &attachment : inputAttachments) { + attachmentReferences.push_back(vk::AttachmentReference{ + .attachment = AddAttachment(attachment), + .layout = attachment.backing->layout, + }); + } + + auto colorAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; // Calculate new base offset as it has changed since we pushed the input attachments + for (auto &attachment : colorAttachments) { + attachmentReferences.push_back(vk::AttachmentReference{ + .attachment = AddAttachment(attachment), + .layout = attachment.backing->layout, + }); + } + + auto depthStencilAttachmentOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; + if (depthStencilAttachment) { + attachmentReferences.push_back(vk::AttachmentReference{ + .attachment = AddAttachment(*depthStencilAttachment), + .layout = depthStencilAttachment->backing->layout, + }); + } + + preserveAttachmentReferences.emplace_back(); // We need to create storage for any attachments that might need to preserved by this pass + + // Note: We encode the offsets as the pointers due to vector pointer invalidation, RebasePointer(...) can be utilized to deduce the real pointer + subpassDescriptions.push_back(vk::SubpassDescription{ + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = static_cast(inputAttachments.size()), + .pInputAttachments = reinterpret_cast(inputAttachmentsOffset), + .colorAttachmentCount = static_cast(colorAttachments.size()), + .pColorAttachments = reinterpret_cast(colorAttachmentsOffset), + .pDepthStencilAttachment = reinterpret_cast(depthStencilAttachment ? depthStencilAttachmentOffset : NoDepthStencil), + }); + } + + bool RenderpassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) { + auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment}; + auto attachmentIndex{attachmentReference->attachment}; + + for (const auto &reference : attachmentReferences) + if (reference.attachment == attachmentIndex && &reference != attachmentReference) + return false; + + auto &attachmentDescription{attachmentDescriptions.at(attachmentIndex)}; + if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eLoad) { + attachmentDescription.loadOp = vk::AttachmentLoadOp::eClear; + + clearValues.resize(attachmentIndex + 1); + clearValues[attachmentIndex].color = value; + + return true; + } else if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eClear && clearValues[attachmentIndex].color.uint32 == value.uint32) { + return true; + } + + return false; + } + + void RenderpassNode::operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { + storage->device = &gpu.vkDevice; + + auto preserveAttachmentIt{preserveAttachmentReferences.begin()}; + for (auto &subpassDescription : subpassDescriptions) { + subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments); + subpassDescription.pColorAttachments = RebasePointer(attachmentReferences, subpassDescription.pColorAttachments); + + auto depthStencilAttachmentOffset{reinterpret_cast(subpassDescription.pDepthStencilAttachment)}; + if (depthStencilAttachmentOffset != NoDepthStencil) + subpassDescription.pDepthStencilAttachment = RebasePointer(attachmentReferences, subpassDescription.pDepthStencilAttachment); + else + subpassDescription.pDepthStencilAttachment = nullptr; + + subpassDescription.preserveAttachmentCount = preserveAttachmentIt->size(); + subpassDescription.pPreserveAttachments = preserveAttachmentIt->data(); + preserveAttachmentIt++; + } + + for (auto &texture : storage->textures) { + texture->lock(); + texture->WaitOnBacking(); + if (texture->cycle.lock() != cycle) + texture->WaitOnFence(); + } + + auto renderpass{(*gpu.vkDevice).createRenderPass(vk::RenderPassCreateInfo{ + .attachmentCount = static_cast(attachmentDescriptions.size()), + .pAttachments = attachmentDescriptions.data(), + .subpassCount = static_cast(subpassDescriptions.size()), + .pSubpasses = subpassDescriptions.data(), + .dependencyCount = static_cast(subpassDependencies.size()), + .pDependencies = subpassDependencies.data(), + }, nullptr, *gpu.vkDevice.getDispatcher())}; + storage->renderpass = renderpass; + + auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{ + .renderPass = renderpass, + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + .width = renderArea.extent.width, + .height = renderArea.extent.height, + .layers = 1, + }, nullptr, *gpu.vkDevice.getDispatcher())}; + storage->framebuffer = framebuffer; + + commandBuffer.beginRenderPass(vk::RenderPassBeginInfo{ + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = renderArea, + .clearValueCount = static_cast(clearValues.size()), + .pClearValues = clearValues.data(), + }, vk::SubpassContents::eInline); + + cycle->AttachObjects(storage); + + for (auto &texture : storage->textures) { + texture->unlock(); + texture->cycle = cycle; + } + } +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h index adee94a1..7baf7895 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h @@ -24,25 +24,22 @@ namespace skyline::gpu::interconnect::node { using FunctionNode = FunctionNodeBase<>; /** - * @brief Creates and begins a VkRenderpass while tying lifetimes of all bound resources to a GPU fence + * @brief Creates and begins a VkRenderpass alongside managing all resources bound to it and to the subpasses inside it */ struct RenderpassNode { private: + /** + * @brief Storage for all resources in the VkRenderPass that have their lifetimes bond to the completion fence + */ struct Storage : public FenceCycleDependency { vk::raii::Device *device{}; vk::Framebuffer framebuffer{}; vk::RenderPass renderpass{}; std::vector> textures; - ~Storage() { - if (device) { - if (framebuffer) - (**device).destroy(framebuffer, nullptr, *device->getDispatcher()); - if (renderpass) - (**device).destroy(renderpass, nullptr, *device->getDispatcher()); - } - } + ~Storage(); }; + std::shared_ptr storage; std::vector attachments; @@ -51,13 +48,13 @@ namespace skyline::gpu::interconnect::node { std::vector attachmentReferences; std::vector> preserveAttachmentReferences; //!< Any attachment that must be preserved to be utilized by a future subpass, these are stored per-subpass to ensure contiguity - constexpr static uintptr_t DepthStencilNull{std::numeric_limits::max()}; //!< A sentinel value to denote the lack of a depth stencil attachment in a VkSubpassDescription + constexpr static uintptr_t NoDepthStencil{std::numeric_limits::max()}; //!< A sentinel value to denote the lack of a depth stencil attachment in a VkSubpassDescription /** * @brief Rebases a pointer containing an offset relative to the beginning of a container */ template - T *RebasePointer(const Container &container, const T *offset) { + constexpr T *RebasePointer(const Container &container, const T *offset) { return reinterpret_cast(reinterpret_cast(container.data()) + reinterpret_cast(offset)); } @@ -74,125 +71,12 @@ namespace skyline::gpu::interconnect::node { * @note Any preservation of attachments from previous subpasses is automatically handled by this * @return The index of the attachment in the render pass which can be utilized with VkAttachmentReference */ - u32 AddAttachment(TextureView &view) { - auto &textures{storage->textures}; - auto texture{std::find(textures.begin(), textures.end(), view.backing)}; - if (texture == textures.end()) - textures.push_back(view.backing); - - auto vkView{view.GetView()}; - auto attachment{std::find(attachments.begin(), attachments.end(), vkView)}; - if (attachment == attachments.end()) { - // If we cannot find any matches for the specified attachment, we add it as a new one - attachments.push_back(vkView); - attachmentDescriptions.push_back(vk::AttachmentDescription{ - .format = *view.format, - .initialLayout = view.backing->layout, - .finalLayout = view.backing->layout, - }); - return attachments.size() - 1; - } else { - // If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass - auto attachmentIndex{std::distance(attachments.begin(), attachment)}; - - auto it{subpassDescriptions.begin()}; - for (; it != subpassDescriptions.end(); it++) { - auto referenceBeginIt{attachmentReferences.begin()}; - referenceBeginIt += reinterpret_cast(it->pInputAttachments) / sizeof(vk::AttachmentReference); - - auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; // We depend on all attachments being contiguous for a subpass, this will horribly break if that assumption is broken - if (reinterpret_cast(it->pDepthStencilAttachment) != DepthStencilNull) - referenceEndIt++; - - if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) { - return reference.attachment == attachmentIndex; - }) != referenceEndIt) - break; // The first subpass that utilizes the attachment we want to preserve - } - - if (it == subpassDescriptions.end()) - throw exception("Cannot find corresponding subpass for attachment #{}", attachmentIndex); - - auto lastUsageIt{it}; - for (; it != subpassDescriptions.end(); it++) { - auto referenceBeginIt{attachmentReferences.begin()}; - referenceBeginIt += reinterpret_cast(it->pInputAttachments) / sizeof(vk::AttachmentReference); - - auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; - if (reinterpret_cast(it->pDepthStencilAttachment) != DepthStencilNull) - referenceEndIt++; - - if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) { - return reference.attachment == attachmentIndex; - }) != referenceEndIt) { - lastUsageIt = it; - continue; // If a subpass uses an attachment then it doesn't need to be preserved - } - - auto &subpassPreserveAttachments{preserveAttachmentReferences[std::distance(subpassDescriptions.begin(), it)]}; - if (std::find(subpassPreserveAttachments.begin(), subpassPreserveAttachments.end(), attachmentIndex) != subpassPreserveAttachments.end()) - subpassPreserveAttachments.push_back(attachmentIndex); - } - - vk::SubpassDependency dependency{ - .srcSubpass = static_cast(std::distance(subpassDescriptions.begin(), lastUsageIt)), - .dstSubpass = static_cast(subpassDescriptions.size()), // We assume that the next subpass is using the attachment - .srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput, - .dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput, - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead, - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - }; - - if (std::find(subpassDependencies.begin(), subpassDependencies.end(), dependency) == subpassDependencies.end()) - subpassDependencies.push_back(dependency); - - return attachmentIndex; - } - } + u32 AddAttachment(TextureView &view); /** * @brief Creates a subpass with the attachments bound in the specified order */ - void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { - attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0)); - - auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; - for (auto &attachment : inputAttachments) { - attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(attachment), - .layout = attachment.backing->layout, - }); - } - - auto colorAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; - for (auto &attachment : colorAttachments) { - attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(attachment), - .layout = attachment.backing->layout, - }); - } - - auto depthStencilAttachmentOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; - if (depthStencilAttachment) { - attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(*depthStencilAttachment), - .layout = depthStencilAttachment->backing->layout, - }); - } - - preserveAttachmentReferences.emplace_back(); // We need to create storage for any attachments that might need to preserved by this pass - - // Note: We encode the offsets as the pointers due to vector pointer invalidation, RebasePointer(...) can be utilized to deduce the real pointer - subpassDescriptions.push_back(vk::SubpassDescription{ - .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, - .inputAttachmentCount = static_cast(inputAttachments.size()), - .pInputAttachments = reinterpret_cast(inputAttachmentsOffset), - .colorAttachmentCount = static_cast(colorAttachments.size()), - .pColorAttachments = reinterpret_cast(colorAttachmentsOffset), - .pDepthStencilAttachment = reinterpret_cast(depthStencilAttachment ? depthStencilAttachmentOffset : DepthStencilNull), - }); - } + void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment); /** * @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_LOAD @@ -200,90 +84,9 @@ namespace skyline::gpu::interconnect::node { * @return If the attachment could be cleared or not due to conflicts with other operations * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass */ - bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) { - auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment}; - auto attachmentIndex{attachmentReference->attachment}; + bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value); - for (const auto &reference : attachmentReferences) - if (reference.attachment == attachmentIndex && &reference != attachmentReference) - return false; - - auto &attachmentDescription{attachmentDescriptions.at(attachmentIndex)}; - if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eLoad) { - attachmentDescription.loadOp = vk::AttachmentLoadOp::eClear; - - clearValues.resize(attachmentIndex + 1); - clearValues[attachmentIndex].color = value; - - return true; - } else if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eClear && clearValues[attachmentIndex].color.uint32 == value.uint32) { - return true; - } - - return false; - } - - void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { - storage->device = &gpu.vkDevice; - - auto preserveAttachmentIt{preserveAttachmentReferences.begin()}; - for (auto &subpassDescription : subpassDescriptions) { - subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments); - subpassDescription.pColorAttachments = RebasePointer(attachmentReferences, subpassDescription.pColorAttachments); - - auto depthStencilAttachmentOffset{reinterpret_cast(subpassDescription.pDepthStencilAttachment)}; - if (depthStencilAttachmentOffset != DepthStencilNull) - subpassDescription.pDepthStencilAttachment = RebasePointer(attachmentReferences, subpassDescription.pDepthStencilAttachment); - else - subpassDescription.pDepthStencilAttachment = nullptr; - - subpassDescription.preserveAttachmentCount = preserveAttachmentIt->size(); - subpassDescription.pPreserveAttachments = preserveAttachmentIt->data(); - preserveAttachmentIt++; - } - - for (auto &texture : storage->textures) { - texture->lock(); - texture->WaitOnBacking(); - if (texture->cycle.lock() != cycle) - texture->WaitOnFence(); - } - - auto renderpass{(*gpu.vkDevice).createRenderPass(vk::RenderPassCreateInfo{ - .attachmentCount = static_cast(attachmentDescriptions.size()), - .pAttachments = attachmentDescriptions.data(), - .subpassCount = static_cast(subpassDescriptions.size()), - .pSubpasses = subpassDescriptions.data(), - .dependencyCount = static_cast(subpassDependencies.size()), - .pDependencies = subpassDependencies.data(), - }, nullptr, *gpu.vkDevice.getDispatcher())}; - storage->renderpass = renderpass; - - auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{ - .renderPass = renderpass, - .attachmentCount = static_cast(attachments.size()), - .pAttachments = attachments.data(), - .width = renderArea.extent.width, - .height = renderArea.extent.height, - .layers = 1, - }, nullptr, *gpu.vkDevice.getDispatcher())}; - storage->framebuffer = framebuffer; - - commandBuffer.beginRenderPass(vk::RenderPassBeginInfo{ - .renderPass = renderpass, - .framebuffer = framebuffer, - .renderArea = renderArea, - .clearValueCount = static_cast(clearValues.size()), - .pClearValues = clearValues.data(), - }, vk::SubpassContents::eInline); - - cycle->AttachObjects(storage); - - for (auto &texture : storage->textures) { - texture->unlock(); - texture->cycle = cycle; - } - } + void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu); }; /** diff --git a/app/src/main/cpp/skyline/gpu/texture/copy.h b/app/src/main/cpp/skyline/gpu/texture/copy.h index 0fd4cebe..f5fdbdd4 100644 --- a/app/src/main/cpp/skyline/gpu/texture/copy.h +++ b/app/src/main/cpp/skyline/gpu/texture/copy.h @@ -9,21 +9,21 @@ namespace skyline::gpu { /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ - void CopyBlockLinearToLinear(GuestTexture& guest, u8* guestInput, u8* linearOutput) { + void CopyBlockLinearToLinear(GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 constexpr u8 SectorWidth{16}; // The width of a sector in bytes constexpr u8 SectorHeight{2}; // The height of a sector in lines constexpr u8 GobWidth{64}; // The width of a GOB in bytes constexpr u8 GobHeight{8}; // The height of a GOB in lines - auto blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs - auto robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines - auto surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; //!< The height of the surface in lines - auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks) - auto robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; //!< The width of a ROB in bytes - auto robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) - auto robBytes{robWidthBytes * robHeight}; //!< The size of a ROB in bytes - auto gobYOffset{robWidthBytes * GobHeight}; //!< The offset of the next Y-axis GOB from the current one in linear space + u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs + u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines + u32 surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; //!< The height of the surface in lines + u32 surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks) + u32 robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; //!< The width of a ROB in bytes + u32 robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) + u32 robBytes{robWidthBytes * robHeight}; //!< The size of a ROB in bytes + u32 gobYOffset{robWidthBytes * GobHeight}; //!< The offset of the next Y-axis GOB from the current one in linear space auto inputSector{guestInput}; auto outputRob{linearOutput}; @@ -51,24 +51,25 @@ namespace skyline::gpu { paddingY = (guest.tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); // Calculate the amount of padding between contiguous sectors } } + /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ - void CopyLinearToBlockLinear(GuestTexture& guest, u8* linearInput, u8* guestOutput) { + void CopyLinearToBlockLinear(GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 constexpr u8 SectorWidth{16}; // The width of a sector in bytes constexpr u8 SectorHeight{2}; // The height of a sector in lines constexpr u8 GobWidth{64}; // The width of a GOB in bytes constexpr u8 GobHeight{8}; // The height of a GOB in lines - auto blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs - auto robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines - auto surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; //!< The height of the surface in lines - auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks) - auto robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; //!< The width of a ROB in bytes - auto robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) - auto robBytes{robWidthBytes * robHeight}; //!< The size of a ROB in bytes - auto gobYOffset{robWidthBytes * GobHeight}; //!< The offset of the next Y-axis GOB from the current one in linear space + u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs + u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines + u32 surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; //!< The height of the surface in lines + u32 surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks) + u32 robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; //!< The width of a ROB in bytes + u32 robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) + u32 robBytes{robWidthBytes * robHeight}; //!< The size of a ROB in bytes + u32 gobYOffset{robWidthBytes * GobHeight}; //!< The offset of the next Y-axis GOB from the current one in linear space auto outputSector{guestOutput}; auto inputRob{linearInput}; @@ -100,7 +101,7 @@ namespace skyline::gpu { /** * @brief Copies the contents of a pitch-linear guest texture to a linear output buffer */ - void CopyPitchLinearToLinear(GuestTexture& guest, u8* guestInput, u8* linearOutput) { + void CopyPitchLinearToLinear(GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data auto sizeStride{guest.format->GetSize(guest.tileConfig.pitch, 1)}; //!< The size of a single stride of pixel data @@ -117,7 +118,7 @@ namespace skyline::gpu { /** * @brief Copies the contents of a linear buffer to a pitch-linear guest texture */ - void CopyLinearToPitchLinear(GuestTexture& guest, u8* linearInput, u8* guestOutput) { + void CopyLinearToPitchLinear(GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data auto sizeStride{guest.format->GetSize(guest.tileConfig.pitch, 1)}; //!< The size of a single stride of pixel data diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 99a9ea88..c9d4d40e 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -11,8 +11,6 @@ namespace skyline::gpu { std::shared_ptr Texture::SynchronizeHostImpl(const std::shared_ptr &pCycle) { if (!guest) throw exception("Synchronization of host textures requires a valid guest texture to synchronize from"); - else if (guest->mappings.size() != 1) - throw exception("Synchronization of non-contigious textures is not supported"); else if (guest->dimensions != dimensions) throw exception("Guest and host dimensions being different is not supported currently"); else if (guest->mappings.size() > 1) @@ -334,8 +332,6 @@ namespace skyline::gpu { void Texture::SynchronizeGuest() { if (!guest) throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); - else if (guest->mappings.size() != 1) - throw exception("Synchronization of non-contigious textures is not supported"); else if (layout == vk::ImageLayout::eUndefined) return; // If the state of the host texture is undefined then so can the guest else if (guest->mappings.size() > 1) @@ -366,8 +362,6 @@ namespace skyline::gpu { void Texture::SynchronizeGuestWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &pCycle) { if (!guest) throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); - else if (guest->mappings.size() != 1) - throw exception("Synchronization of non-contigious textures is not supported"); else if (layout == vk::ImageLayout::eUndefined) return; // If the state of the host texture is undefined then so can the guest else if (guest->mappings.size() > 1) diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 14f4fca4..833c36d7 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -214,13 +214,17 @@ namespace skyline::gpu { }; constexpr bool operator==(const TileConfig &other) const { - if (mode == other.mode) - if (mode == TileMode::Linear) - return true; - else if (mode == TileMode::Pitch) - return pitch == other.pitch; - else if (mode == TileMode::Block) - return blockHeight == other.blockHeight && blockDepth == other.blockDepth; + if (mode == other.mode) { + switch (mode) { + case TileMode::Linear: + return true; + case TileMode::Pitch: + return pitch == other.pitch; + case TileMode::Block: + return blockHeight == other.blockHeight && blockDepth == other.blockDepth; + } + } + return false; } }; @@ -248,7 +252,7 @@ namespace skyline::gpu { * @brief A descriptor for a texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host */ struct GuestTexture { - using Mappings = boost::container::small_vector, 3>; + using Mappings = boost::container::small_vector, 3>; Mappings mappings; //!< Spans to CPU memory for the underlying data backing this texture texture::Dimensions dimensions; @@ -261,9 +265,25 @@ namespace skyline::gpu { GuestTexture() {} - GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) : mappings(mappings), dimensions(dimensions), format(format), tileConfig(tileConfig), type(type), baseArrayLayer(baseArrayLayer), layerCount(layerCount), layerStride(layerStride) {} + GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) + : mappings(mappings), + dimensions(dimensions), + format(format), + tileConfig(tileConfig), + type(type), + baseArrayLayer(baseArrayLayer), + layerCount(layerCount), + layerStride(layerStride) {} - GuestTexture(span mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) : mappings(1, mapping), dimensions(dimensions), format(format), tileConfig(tileConfig), type(type), baseArrayLayer(baseArrayLayer), layerCount(layerCount), layerStride(layerStride) {} + GuestTexture(span mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) + : mappings(1, mapping), + dimensions(dimensions), + format(format), + tileConfig(tileConfig), + type(type), + baseArrayLayer(baseArrayLayer), + layerCount(layerCount), + layerStride(layerStride) {} }; class TextureManager; @@ -323,18 +343,18 @@ namespace skyline::gpu { /** * @brief Records commands for copying data from a staging buffer to the texture's backing into the supplied command buffer */ - void CopyFromStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr& stagingBuffer); + void CopyFromStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &stagingBuffer); /** * @brief Records commands for copying data from the texture's backing to a staging buffer into the supplied command buffer */ - void CopyIntoStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr& stagingBuffer); + void CopyIntoStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &stagingBuffer); /** * @brief Copies data from the supplied host buffer into the guest texture * @note The host buffer must be contain the entire image */ - void CopyToGuest(u8* hostBuffer); + void CopyToGuest(u8 *hostBuffer); /** * @brief A FenceCycleDependency that copies the contents of a staging buffer or mapped image backing the texture to the guest texture diff --git a/app/src/main/cpp/skyline/gpu/texture_manager.cpp b/app/src/main/cpp/skyline/gpu/texture_manager.cpp index b6260910..e679dd80 100644 --- a/app/src/main/cpp/skyline/gpu/texture_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/texture_manager.cpp @@ -51,9 +51,9 @@ namespace skyline::gpu { .layerCount = texture->layerCount, }, guestTexture.format); } - } else if (mappingMatch) { + } /* else if (mappingMatch) { // We've gotten a partial match with a certain subset of contiguous mappings matching, we need to check if this is a meaningful overlap - if (false) { + if (MeaningfulOverlap) { // TODO: Layout Checks + Check match against Base Layer in TIC auto &texture{hostMapping->texture}; return TextureView(texture, static_cast(guestTexture.type), vk::ImageSubresourceRange{ @@ -62,7 +62,7 @@ namespace skyline::gpu { .layerCount = texture->layerCount, }, guestTexture.format); } - } + } */ } // Create a texture as we cannot find one that matches diff --git a/app/src/main/cpp/skyline/nce.cpp b/app/src/main/cpp/skyline/nce.cpp index af01527f..3b0f69da 100644 --- a/app/src/main/cpp/skyline/nce.cpp +++ b/app/src/main/cpp/skyline/nce.cpp @@ -102,10 +102,11 @@ namespace skyline::nce { *tls = nullptr; } else { // If TLS wasn't restored then this occurred in host code if (signal == SIGSEGV) { - static bool RunningUnderDebugger{[]() { - std::ifstream status("/proc/self/status"); - constexpr std::string_view TracerPidTag = "TracerPid:"; + bool runningUnderDebugger{[]() { + static std::ifstream status("/proc/self/status"); + status.seekg(0); + constexpr std::string_view TracerPidTag = "TracerPid:"; for (std::string line; std::getline(status, line);) { if (line.starts_with(TracerPidTag)) { line = line.substr(TracerPidTag.size()); @@ -123,7 +124,7 @@ namespace skyline::nce { return false; }()}; - if (RunningUnderDebugger) + if (runningUnderDebugger) raise(SIGTRAP); // Notify the debugger if we've got a SIGSEGV as the debugger doesn't catch them by default as they might be hooked } diff --git a/app/src/main/cpp/skyline/services/codec/IHardwareOpusDecoder.cpp b/app/src/main/cpp/skyline/services/codec/IHardwareOpusDecoder.cpp index 16e30fb3..f1496934 100644 --- a/app/src/main/cpp/skyline/services/codec/IHardwareOpusDecoder.cpp +++ b/app/src/main/cpp/skyline/services/codec/IHardwareOpusDecoder.cpp @@ -23,8 +23,7 @@ namespace skyline::service::codec { // We utilize the guest-supplied work buffer for allocating the OpusDecoder object into decoderState = reinterpret_cast(workBuffer->host.ptr); - int result{opus_decoder_init(decoderState, sampleRate, channelCount)}; - if (result != OPUS_OK) + if (int result = opus_decoder_init(decoderState, sampleRate, channelCount) != OPUS_OK) throw OpusException(result); }