Subpass Support + More RT Formats + Fix FenceCycle Cyclic Dependencies

Support for subpasses was added by reworking attachment reuse code to account for preserved attachments and subpass dependencies. A lot of RT formats were also added to allow SMO to boot up entirely, it should be noted that it doesn't render anything. 

`FenceCycle` had a cyclic dependency which broke clean exit, we now utilize `std::weak_ptr<FenceCycle>` inside the `Texture` object. A minor fix for broken stack traces was also made caused by supplying a `nullptr` C-string to libfmt when a symbol was unresolved which caused an `abort` due to invocation of `strlen` with it.
This commit is contained in:
PixelyIon 2021-09-22 09:00:11 +05:30 committed by Billy Laws
parent 239d2625e2
commit 95a08627e5
12 changed files with 332 additions and 117 deletions

View File

@ -397,6 +397,11 @@ namespace skyline {
constexpr span(const std::span<T, Extent> &spn) : std::span<T, Extent>(spn) {} constexpr span(const std::span<T, Extent> &spn) : std::span<T, Extent>(spn) {}
/**
* @brief A single-element constructor for a span
*/
constexpr span(T &spn) : std::span<T, Extent>(&spn, 1) {}
/** /**
* @brief We want to support implicitly casting from std::string_view -> span as it's just a specialization of a data view which span is a generic form of, the opposite doesn't hold true as not all data held by a span is string data therefore the conversion isn't implicit there * @brief We want to support implicitly casting from std::string_view -> span as it's just a specialization of a data view which span is a generic form of, the opposite doesn't hold true as not all data held by a span is string data therefore the conversion isn't implicit there
*/ */

View File

@ -87,25 +87,30 @@ namespace skyline::gpu {
case util::Hash(string): \ case util::Hash(string): \
if(string == type) \ if(string == type) \
return VK_FALSE; \ return VK_FALSE; \
else \
break break
#define DEBUG_VALIDATION(string) \ #define DEBUG_VALIDATION(string) \
case util::Hash(string): \ case util::Hash(string): \
if(string == type) \ if(string == type) \
__builtin_debugtrap(); \ raise(SIGTRAP); \
break break
// Using __builtin_debugtrap() as opposed to raise(SIGTRAP) will result in the inability to continue
std::string_view type(message); std::string_view type(message);
auto first{type.find_first_of('[') + 2}; auto first{type.find('[')};
type = type.substr(first, type.find_first_of(']', first) - 4); auto last{type.find(']', first)};
if (first != std::string_view::npos && last != std::string_view::npos) {
type = type.substr(first + 2, last != std::string_view::npos ? last - 4 : last);
std::string typeStr{type};
switch (util::Hash(type)) { switch (util::Hash(type)) {
IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly
IGNORE_VALIDATION("UNASSIGNED-GeneralParameterPerfWarn-SuboptimalSwapchain"); // Same as SwapchainPreTransform IGNORE_VALIDATION("UNASSIGNED-GeneralParameterPerfWarn-SuboptimalSwapchain"); // Same as SwapchainPreTransform
IGNORE_VALIDATION("UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout"); // We utilize images as VK_IMAGE_LAYOUT_GENERAL rather than optimal layouts for operations
} }
#undef IGNORE_TYPE #undef IGNORE_TYPE
}
logger->Write(severityLookup.at(std::countr_zero(static_cast<u32>(flags))), util::Format("Vk{}:{}[0x{:X}]:I{}:L{}: {}", layerPrefix, vk::to_string(vk::DebugReportObjectTypeEXT(objectType)), object, messageCode, location, message)); logger->Write(severityLookup.at(std::countr_zero(static_cast<u32>(flags))), util::Format("Vk{}:{}[0x{:X}]:I{}:L{}: {}", layerPrefix, vk::to_string(vk::DebugReportObjectTypeEXT(objectType)), object, messageCode, location, message));

View File

@ -5,8 +5,8 @@
#include "command_executor.h" #include "command_executor.h"
namespace skyline::gpu::interconnect { namespace skyline::gpu::interconnect {
void CommandExecutor::AddSubpass(const std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &function, vk::Rect2D renderArea, std::vector<TextureView> inputAttachments, std::vector<TextureView> colorAttachments, std::optional<TextureView> depthStencilAttachment) { bool CommandExecutor::CreateRenderpass(vk::Rect2D renderArea) {
if (renderpass) { // TODO: Subpass support (&& renderpass->renderArea != renderArea) if (renderpass && renderpass->renderArea != renderArea) {
nodes.emplace_back(std::in_place_type_t<node::RenderpassEndNode>()); nodes.emplace_back(std::in_place_type_t<node::RenderpassEndNode>());
renderpass = nullptr; renderpass = nullptr;
} }
@ -16,13 +16,44 @@ namespace skyline::gpu::interconnect {
// We need to create a render pass if one doesn't already exist or the current one isn't compatible // We need to create a render pass if one doesn't already exist or the current one isn't compatible
renderpass = &std::get<node::RenderpassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderpassNode>(), renderArea)); renderpass = &std::get<node::RenderpassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderpassNode>(), renderArea));
renderpass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment); return newRenderpass;
}
void CommandExecutor::AddSubpass(const std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &function, vk::Rect2D renderArea, std::vector<TextureView> inputAttachments, std::vector<TextureView> colorAttachments, std::optional<TextureView> depthStencilAttachment) {
bool newRenderpass{CreateRenderpass(renderArea)};
renderpass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr);
if (newRenderpass) if (newRenderpass)
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), function); nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), function);
else else
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>(), function); nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>(), function);
} }
void CommandExecutor::AddClearSubpass(TextureView attachment, const vk::ClearColorValue &value) {
bool newRenderpass{CreateRenderpass(vk::Rect2D{
.extent = attachment.backing->dimensions,
})};
renderpass->AddSubpass({}, attachment, nullptr);
if (!renderpass->ClearColorAttachment(0, value)) {
auto function{[scissor = attachment.backing->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
commandBuffer.clearAttachments(vk::ClearAttachment{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.colorAttachment = 0,
.clearValue = value,
}, vk::ClearRect{
.rect = scissor,
.baseArrayLayer = 0,
.layerCount = 1,
});
}};
if (newRenderpass)
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>(), function);
}
}
void CommandExecutor::Execute() { void CommandExecutor::Execute() {
if (!nodes.empty()) { if (!nodes.empty()) {
if (renderpass) { if (renderpass) {
@ -36,6 +67,7 @@ namespace skyline::gpu::interconnect {
std::visit(VariantVisitor{ std::visit(VariantVisitor{
[&](FunctionNode &node) { node(commandBuffer, cycle, gpu); }, [&](FunctionNode &node) { node(commandBuffer, cycle, gpu); },
[&](RenderpassNode &node) { node(commandBuffer, cycle, gpu); }, [&](RenderpassNode &node) { node(commandBuffer, cycle, gpu); },
[&](NextSubpassNode &node) { node(commandBuffer, cycle, gpu); },
[&](RenderpassEndNode &node) { node(commandBuffer, cycle, gpu); }, [&](RenderpassEndNode &node) { node(commandBuffer, cycle, gpu); },
}, node); }, node);
} }

View File

@ -17,6 +17,11 @@ namespace skyline::gpu::interconnect {
boost::container::stable_vector<node::NodeVariant> nodes; boost::container::stable_vector<node::NodeVariant> nodes;
node::RenderpassNode *renderpass{}; node::RenderpassNode *renderpass{};
/**
* @return If a new renderpass was created by the function or the current one was reused as it was compatible
*/
bool CreateRenderpass(vk::Rect2D renderArea);
public: public:
CommandExecutor(const DeviceState &state) : gpu(*state.gpu) {} CommandExecutor(const DeviceState &state) : gpu(*state.gpu) {}
@ -26,6 +31,15 @@ namespace skyline::gpu::interconnect {
*/ */
void AddSubpass(const std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &function, vk::Rect2D renderArea, std::vector<TextureView> inputAttachments = {}, std::vector<TextureView> colorAttachments = {}, std::optional<TextureView> depthStencilAttachment = {}); void AddSubpass(const std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &function, vk::Rect2D renderArea, std::vector<TextureView> inputAttachments = {}, std::vector<TextureView> colorAttachments = {}, std::optional<TextureView> depthStencilAttachment = {});
/**
* @brief Adds a subpass that clears the entirety of the specified attachment with a value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
* @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution
*/
void AddClearSubpass(TextureView attachment, const vk::ClearColorValue& value);
/**
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
*/
void Execute(); void Execute();
}; };
} }

View File

@ -49,7 +49,17 @@ namespace skyline::gpu::interconnect::node {
std::vector<vk::AttachmentDescription> attachmentDescriptions; std::vector<vk::AttachmentDescription> attachmentDescriptions;
std::vector<vk::AttachmentReference> attachmentReferences; std::vector<vk::AttachmentReference> attachmentReferences;
std::vector<boost::container::small_vector<u32, 5>> preserveAttachmentReferences; //!< Any attachment that must be preserved to be utilized by a future subpass, these are stored per-subpass to ensure contiguity std::vector<std::vector<u32>> preserveAttachmentReferences; //!< Any attachment that must be preserved to be utilized by a future subpass, these are stored per-subpass to ensure contiguity
constexpr static uintptr_t DepthStencilNull{std::numeric_limits<uintptr_t>::max()}; //!< A sentinel value to denote the lack of a depth stencil attachment in a VkSubpassDescription
/**
* @brief Rebases a pointer containing an offset relative to the beginning of a container
*/
template<typename Container, typename T>
T *RebasePointer(const Container &container, const T *offset) {
return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(container.data()) + reinterpret_cast<uintptr_t>(offset));
}
public: public:
std::vector<vk::SubpassDescription> subpassDescriptions; std::vector<vk::SubpassDescription> subpassDescriptions;
@ -70,39 +80,81 @@ namespace skyline::gpu::interconnect::node {
if (texture == textures.end()) if (texture == textures.end())
textures.push_back(view.backing); textures.push_back(view.backing);
vk::AttachmentDescription attachmentDescription{ auto vkView{view.GetView()};
auto attachment{std::find(attachments.begin(), attachments.end(), vkView)};
if (attachment == attachments.end()) {
// If we cannot find any matches for the specified attachment, we add it as a new one
attachments.push_back(vkView);
attachmentDescriptions.push_back(vk::AttachmentDescription{
.format = *view.format, .format = *view.format,
.initialLayout = view.backing->layout, .initialLayout = view.backing->layout,
.finalLayout = view.backing->layout, .finalLayout = view.backing->layout,
}; });
auto vkView{view.GetView()};
auto attachment{std::find(attachments.begin(), attachments.end(), vkView)};
if (attachment == attachments.end() || attachmentDescriptions[std::distance(attachments.begin(), attachment)] != attachmentDescription) {
// If we cannot find any matches for the specified attachment, we add it as a new one
attachments.push_back(vkView);
attachmentDescriptions.push_back(attachmentDescription);
return attachments.size() - 1; return attachments.size() - 1;
} else { } else {
// If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass // If we've got a match from a previous subpass, we need to preserve the attachment till the current subpass
auto attachmentIndex{std::distance(attachments.begin(), attachment)}; auto attachmentIndex{std::distance(attachments.begin(), attachment)};
auto attachmentReferenceIt{std::find_if(attachmentReferences.begin(), attachmentReferences.end(), [&](const vk::AttachmentReference &reference) {
auto it{subpassDescriptions.begin()};
for (; it != subpassDescriptions.end(); it++) {
auto referenceBeginIt{attachmentReferences.begin()};
referenceBeginIt += reinterpret_cast<uintptr_t>(it->pInputAttachments) / sizeof(vk::AttachmentReference);
auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount}; // We depend on all attachments being contiguous for a subpass, this will horribly break if that assumption is broken
if (reinterpret_cast<uintptr_t>(it->pDepthStencilAttachment) != DepthStencilNull)
referenceEndIt++;
if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) {
return reference.attachment == attachmentIndex; return reference.attachment == attachmentIndex;
})}; }) != referenceEndIt)
break; // The first subpass that utilizes the attachment we want to preserve
}
auto attachmentReferenceOffset{std::distance(attachmentReferences.begin(), attachmentReferenceIt) * sizeof(vk::AttachmentReference)}; if (it == subpassDescriptions.end())
auto subpassDescriptionIt{std::find_if(subpassDescriptions.begin(), subpassDescriptions.end(), [&](const vk::SubpassDescription &description) { throw exception("Cannot find corresponding subpass for attachment #{}", attachmentIndex);
return reinterpret_cast<uintptr_t>(description.pDepthStencilAttachment) > attachmentReferenceOffset;
})};
for (ssize_t subpassIndex{std::distance(subpassDescriptions.begin(), subpassDescriptionIt)}; subpassIndex != subpassDescriptions.size(); subpassIndex++) auto lastUsageIt{it};
preserveAttachmentReferences[subpassIndex].push_back(attachmentIndex); for (; it != subpassDescriptions.end(); it++) {
auto referenceBeginIt{attachmentReferences.begin()};
referenceBeginIt += reinterpret_cast<uintptr_t>(it->pInputAttachments) / sizeof(vk::AttachmentReference);
return std::distance(attachments.begin(), attachment); auto referenceEndIt{referenceBeginIt + it->inputAttachmentCount + it->colorAttachmentCount};
if (reinterpret_cast<uintptr_t>(it->pDepthStencilAttachment) != DepthStencilNull)
referenceEndIt++;
if (std::find_if(referenceBeginIt, referenceEndIt, [&](const vk::AttachmentReference &reference) {
return reference.attachment == attachmentIndex;
}) != referenceEndIt) {
lastUsageIt = it;
continue; // If a subpass uses an attachment then it doesn't need to be preserved
}
auto &subpassPreserveAttachments{preserveAttachmentReferences[std::distance(subpassDescriptions.begin(), it)]};
if (std::find(subpassPreserveAttachments.begin(), subpassPreserveAttachments.end(), attachmentIndex) != subpassPreserveAttachments.end())
subpassPreserveAttachments.push_back(attachmentIndex);
}
vk::SubpassDependency dependency{
.srcSubpass = static_cast<u32>(std::distance(subpassDescriptions.begin(), lastUsageIt)),
.dstSubpass = static_cast<uint32_t>(subpassDescriptions.size()), // We assume that the next subpass is using the attachment
.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput,
.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead,
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
};
if (std::find(subpassDependencies.begin(), subpassDependencies.end(), dependency) == subpassDependencies.end())
subpassDependencies.push_back(dependency);
return attachmentIndex;
} }
} }
void AddSubpass(std::vector<TextureView> &inputAttachments, std::vector<TextureView> &colorAttachments, std::optional<TextureView> &depthStencilAttachment) { /**
* @brief Creates a subpass with the attachments bound in the specified order
*/
void AddSubpass(span <TextureView> inputAttachments, span <TextureView> colorAttachments, TextureView *depthStencilAttachment) {
attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0)); attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0));
auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)};
@ -131,29 +183,57 @@ namespace skyline::gpu::interconnect::node {
preserveAttachmentReferences.emplace_back(); // We need to create storage for any attachments that might need to preserved by this pass preserveAttachmentReferences.emplace_back(); // We need to create storage for any attachments that might need to preserved by this pass
// Note: We encode the offsets as the pointers due to vector pointer invalidation, the vector offset will be added to them prior to submission // Note: We encode the offsets as the pointers due to vector pointer invalidation, RebasePointer(...) can be utilized to deduce the real pointer
subpassDescriptions.push_back(vk::SubpassDescription{ subpassDescriptions.push_back(vk::SubpassDescription{
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics, .pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
.inputAttachmentCount = static_cast<u32>(inputAttachments.size()), .inputAttachmentCount = static_cast<u32>(inputAttachments.size()),
.pInputAttachments = reinterpret_cast<vk::AttachmentReference *>(inputAttachmentsOffset), .pInputAttachments = reinterpret_cast<vk::AttachmentReference *>(inputAttachmentsOffset),
.colorAttachmentCount = static_cast<u32>(colorAttachments.size()), .colorAttachmentCount = static_cast<u32>(colorAttachments.size()),
.pColorAttachments = reinterpret_cast<vk::AttachmentReference *>(colorAttachmentsOffset), .pColorAttachments = reinterpret_cast<vk::AttachmentReference *>(colorAttachmentsOffset),
.pDepthStencilAttachment = reinterpret_cast<vk::AttachmentReference *>(depthStencilAttachment ? depthStencilAttachmentOffset : std::numeric_limits<uintptr_t>::max()), .pDepthStencilAttachment = reinterpret_cast<vk::AttachmentReference *>(depthStencilAttachment ? depthStencilAttachmentOffset : DepthStencilNull),
}); });
} }
/**
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_LOAD
* @param colorAttachment The index of the attachment in the attachments bound to the current subpass
* @return If the attachment could be cleared or not due to conflicts with other operations
* @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass
*/
bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) {
auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment};
auto attachmentIndex{attachmentReference->attachment};
for (const auto &reference : attachmentReferences)
if (reference.attachment == attachmentIndex && &reference != attachmentReference)
return false;
auto &attachmentDescription{attachmentDescriptions.at(attachmentIndex)};
if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eLoad) {
attachmentDescription.loadOp = vk::AttachmentLoadOp::eClear;
clearValues.resize(attachmentIndex + 1);
clearValues[attachmentIndex].color = value;
return true;
} else if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eClear && clearValues[attachmentIndex].color.uint32 == value.uint32) {
return true;
}
return false;
}
void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) { void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) {
storage->device = &gpu.vkDevice; storage->device = &gpu.vkDevice;
auto preserveAttachmentIt{preserveAttachmentReferences.begin()}; auto preserveAttachmentIt{preserveAttachmentReferences.begin()};
auto attachmentReferenceOffset{reinterpret_cast<uintptr_t>(attachmentReferences.data())};
for (auto &subpassDescription : subpassDescriptions) { for (auto &subpassDescription : subpassDescriptions) {
subpassDescription.pInputAttachments = reinterpret_cast<vk::AttachmentReference *>(attachmentReferenceOffset + reinterpret_cast<uintptr_t>(subpassDescription.pInputAttachments)); subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments);
subpassDescription.pColorAttachments = reinterpret_cast<vk::AttachmentReference *>(attachmentReferenceOffset + reinterpret_cast<uintptr_t>(subpassDescription.pColorAttachments)); subpassDescription.pColorAttachments = RebasePointer(attachmentReferences, subpassDescription.pColorAttachments);
auto depthStencilAttachmentOffset{reinterpret_cast<uintptr_t>(subpassDescription.pDepthStencilAttachment)}; auto depthStencilAttachmentOffset{reinterpret_cast<uintptr_t>(subpassDescription.pDepthStencilAttachment)};
if (depthStencilAttachmentOffset != std::numeric_limits<uintptr_t>::max()) if (depthStencilAttachmentOffset != DepthStencilNull)
subpassDescription.pDepthStencilAttachment = reinterpret_cast<vk::AttachmentReference *>(attachmentReferenceOffset + depthStencilAttachmentOffset); subpassDescription.pDepthStencilAttachment = RebasePointer(attachmentReferences, subpassDescription.pDepthStencilAttachment);
else else
subpassDescription.pDepthStencilAttachment = nullptr; subpassDescription.pDepthStencilAttachment = nullptr;
@ -165,7 +245,7 @@ namespace skyline::gpu::interconnect::node {
for (auto &texture : storage->textures) { for (auto &texture : storage->textures) {
texture->lock(); texture->lock();
texture->WaitOnBacking(); texture->WaitOnBacking();
if (texture->cycle != cycle) if (texture->cycle.lock() != cycle)
texture->WaitOnFence(); texture->WaitOnFence();
} }
@ -209,7 +289,7 @@ namespace skyline::gpu::interconnect::node {
/** /**
* @brief A FunctionNode which progresses to the next subpass prior to calling the function * @brief A FunctionNode which progresses to the next subpass prior to calling the function
*/ */
struct NextSubpassNode : FunctionNode { struct NextSubpassNode : private FunctionNode {
using FunctionNode::FunctionNode; using FunctionNode::FunctionNode;
void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) { void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) {

View File

@ -88,12 +88,30 @@ namespace skyline::gpu::interconnect {
switch (format) { switch (format) {
case maxwell3d::RenderTarget::ColorFormat::None: case maxwell3d::RenderTarget::ColorFormat::None:
return {}; return {};
case maxwell3d::RenderTarget::ColorFormat::R32B32G32A32Float:
return format::R32B32G32A32Float;
case maxwell3d::RenderTarget::ColorFormat::R16G16B16A16Float:
return format::R16G16B16A16Float;
case maxwell3d::RenderTarget::ColorFormat::A2B10G10R10Unorm: case maxwell3d::RenderTarget::ColorFormat::A2B10G10R10Unorm:
return format::A2B10G10R10Unorm; return format::A2B10G10R10Unorm;
case maxwell3d::RenderTarget::ColorFormat::R8G8B8A8Unorm: case maxwell3d::RenderTarget::ColorFormat::R8G8B8A8Unorm:
return format::R8G8B8A8Unorm; return format::R8G8B8A8Unorm;
case maxwell3d::RenderTarget::ColorFormat::A8B8G8R8Srgb: case maxwell3d::RenderTarget::ColorFormat::A8B8G8R8Srgb:
return format::A8B8G8R8Srgb; return format::A8B8G8R8Srgb;
case maxwell3d::RenderTarget::ColorFormat::R16G16Snorm:
return format::R16G16Snorm;
case maxwell3d::RenderTarget::ColorFormat::R16G16Float:
return format::R16G16Float;
case maxwell3d::RenderTarget::ColorFormat::B10G11R11Float:
return format::B10G11R11Float;
case maxwell3d::RenderTarget::ColorFormat::R32Float:
return format::R32Float;
case maxwell3d::RenderTarget::ColorFormat::R8G8Snorm:
return format::R8G8Snorm;
case maxwell3d::RenderTarget::ColorFormat::R16Float:
return format::R16Float;
case maxwell3d::RenderTarget::ColorFormat::R8Unorm:
return format::R8Unorm;
default: default:
throw exception("Cannot translate the supplied RT format: 0x{:X}", static_cast<u32>(format)); throw exception("Cannot translate the supplied RT format: 0x{:X}", static_cast<u32>(format));
} }
@ -206,7 +224,20 @@ namespace skyline::gpu::interconnect {
aspect |= vk::ImageAspectFlagBits::eColor; aspect |= vk::ImageAspectFlagBits::eColor;
aspect &= renderTarget.format->vkAspect; aspect &= renderTarget.format->vkAspect;
executor.AddSubpass([aspect = aspect, clearColorValue = clearColorValue, layerId = clear.layerId, scissor = scissors.at(renderTargetIndex)](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) { if (aspect == vk::ImageAspectFlags{})
return;
auto scissor{scissors.at(renderTargetIndex)};
scissor.extent.width = std::min(renderTarget.backing->dimensions.width - scissor.offset.x, scissor.extent.width);
scissor.extent.height = std::min(renderTarget.backing->dimensions.height - scissor.offset.y, scissor.extent.height);
if (scissor.extent.width == 0 || scissor.extent.height == 0)
return;
if (scissor.extent.width == renderTarget.backing->dimensions.width && scissor.extent.width == renderTarget.backing->dimensions.width && renderTarget.range.baseArrayLayer == 0 && renderTarget.range.layerCount == 1 && clear.layerId == 0) {
executor.AddClearSubpass(renderTarget, clearColorValue);
} else {
executor.AddSubpass([aspect, clearColorValue = clearColorValue, layerId = clear.layerId, scissor](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
commandBuffer.clearAttachments(vk::ClearAttachment{ commandBuffer.clearAttachments(vk::ClearAttachment{
.aspectMask = aspect, .aspectMask = aspect,
.colorAttachment = 0, .colorAttachment = 0,
@ -221,6 +252,7 @@ namespace skyline::gpu::interconnect {
}, {}, {renderTarget}); }, {}, {renderTarget});
} }
} }
}
/* Viewport Scissors */ /* Viewport Scissors */

View File

@ -53,7 +53,7 @@ namespace skyline::gpu {
engine->vsyncEvent->Signal(); engine->vsyncEvent->Signal();
// Post the frame callback to be triggered on the next display refresh // Post the frame callback to be triggered on the next display refresh
AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), engine); AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback64>(&ChoreographerCallback), engine);
} }
void PresentationEngine::ChoreographerThread() { void PresentationEngine::ChoreographerThread() {
@ -61,7 +61,7 @@ namespace skyline::gpu {
try { try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
choreographerLooper = ALooper_prepare(0); choreographerLooper = ALooper_prepare(0);
AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), this); AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback64>(&ChoreographerCallback), this);
ALooper_pollAll(-1, nullptr, nullptr, nullptr); // Will block and process callbacks till ALooper_wake() is called ALooper_pollAll(-1, nullptr, nullptr, nullptr); // Will block and process callbacks till ALooper_wake() is called
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {
state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames)); state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
@ -122,6 +122,11 @@ namespace skyline::gpu {
if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage) if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage)
throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags)); throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags));
auto requestedMode{state.settings->disableFrameThrottling ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eFifo};
auto modes{gpu.vkPhysicalDevice.getSurfacePresentModesKHR(**vkSurface)};
if (std::find(modes.begin(), modes.end(), requestedMode) == modes.end())
throw exception("Swapchain doesn't support present mode: {}", vk::to_string(requestedMode));
vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{
.surface = **vkSurface, .surface = **vkSurface,
.minImageCount = minImageCount, .minImageCount = minImageCount,
@ -132,7 +137,7 @@ namespace skyline::gpu {
.imageUsage = presentUsage, .imageUsage = presentUsage,
.imageSharingMode = vk::SharingMode::eExclusive, .imageSharingMode = vk::SharingMode::eExclusive,
.compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit,
.presentMode = state.settings->disableFrameThrottling ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eFifo, .presentMode = requestedMode,
.clipped = true, .clipped = true,
}); });

View File

@ -9,11 +9,24 @@ namespace skyline::gpu::format {
using Format = gpu::texture::FormatBase; using Format = gpu::texture::FormatBase;
using vkf = vk::Format; using vkf = vk::Format;
using vka = vk::ImageAspectFlagBits; using vka = vk::ImageAspectFlagBits;
using swc = gpu::texture::SwizzleChannel;
constexpr Format R8G8B8A8Unorm{sizeof(u32), 1, 1, vkf::eR8G8B8A8Unorm, vka::eColor}; constexpr Format R8G8B8A8Unorm{sizeof(u32), vkf::eR8G8B8A8Unorm};
constexpr Format R5G6B5Unorm{sizeof(u16), 1, 1, vkf::eR5G6B5UnormPack16, vka::eColor}; constexpr Format R5G6B5Unorm{sizeof(u16), vkf::eR5G6B5UnormPack16};
constexpr Format A2B10G10R10Unorm{sizeof(u32), 1, 1, vkf::eA2B10G10R10UnormPack32, vka::eColor}; constexpr Format A2B10G10R10Unorm{sizeof(u32), vkf::eA2B10G10R10UnormPack32};
constexpr Format A8B8G8R8Srgb{sizeof(u32), 1, 1, vkf::eA8B8G8R8SrgbPack32, vka::eColor}; constexpr Format A8B8G8R8Srgb{sizeof(u32), vkf::eA8B8G8R8SrgbPack32};
constexpr Format R16G16Snorm{sizeof(u32), vkf::eR16G16Snorm};
constexpr Format R16G16Float{sizeof(u32), vkf::eR16G16Sfloat};
constexpr Format B10G11R11Float{sizeof(u32), vkf::eB10G11R11UfloatPack32};
constexpr Format R32Float{sizeof(u32), vkf::eR32Sfloat};
constexpr Format R8G8Snorm{sizeof(u16), vkf::eR8G8Snorm};
constexpr Format R16Float{sizeof(u16), vkf::eR16Sfloat};
constexpr Format R8Unorm{sizeof(u8), vkf::eR8Unorm};
constexpr Format R32B32G32A32Float{sizeof(u32) * 4, vkf::eR32G32B32A32Sfloat, .swizzle = {
.blue = swc::Green,
.green = swc::Blue,
}};
constexpr Format R16G16B16A16Float{sizeof(u16) * 4, vkf::eR16G16B16A16Sfloat};
/** /**
* @brief Converts a Vulkan format to a Skyline format * @brief Converts a Vulkan format to a Skyline format
@ -28,6 +41,22 @@ namespace skyline::gpu::format {
return A2B10G10R10Unorm; return A2B10G10R10Unorm;
case vk::Format::eA8B8G8R8SrgbPack32: case vk::Format::eA8B8G8R8SrgbPack32:
return A8B8G8R8Srgb; return A8B8G8R8Srgb;
case vk::Format::eR16G16Snorm:
return R16G16Snorm;
case vk::Format::eR16G16Sfloat:
return R16G16Float;
case vk::Format::eB10G11R11UfloatPack32:
return B10G11R11Float;
case vk::Format::eR32Sfloat:
return format::R32Float;
case vk::Format::eR16Sfloat:
return R16Float;
case vk::Format::eR8G8Snorm:
return R8G8Snorm;
case vk::Format::eR8Unorm:
return R8Unorm;
case vk::Format::eR16G16B16A16Sfloat:
return R16G16B16A16Float;
default: default:
throw exception("Vulkan format not supported: '{}'", vk::to_string(format)); throw exception("Vulkan format not supported: '{}'", vk::to_string(format));
} }

View File

@ -101,8 +101,9 @@ namespace skyline::gpu {
} }
void Texture::WaitOnFence() { void Texture::WaitOnFence() {
if (cycle) { auto lCycle{cycle.lock()};
cycle->Wait(); if (lCycle) {
lCycle->Wait();
cycle.reset(); cycle.reset();
} }
} }
@ -232,7 +233,7 @@ namespace skyline::gpu {
throw exception("Backing properties changing during sync is not supported"); throw exception("Backing properties changing during sync is not supported");
WaitOnFence(); WaitOnFence();
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
auto image{GetBacking()}; auto image{GetBacking()};
if (layout != vk::ImageLayout::eTransferDstOptimal) { if (layout != vk::ImageLayout::eTransferDstOptimal) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
@ -277,8 +278,9 @@ namespace skyline::gpu {
.layerCount = 1, .layerCount = 1,
}, },
}); });
}); })};
cycle->AttachObjects(stagingBuffer, shared_from_this()); lCycle->AttachObjects(stagingBuffer, shared_from_this());
cycle = lCycle;
} }
} }
@ -309,7 +311,7 @@ namespace skyline::gpu {
else if (source->format != format) else if (source->format != format)
throw exception("Cannot copy from image with different format"); throw exception("Cannot copy from image with different format");
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
auto sourceBacking{source->GetBacking()}; auto sourceBacking{source->GetBacking()};
if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { if (source->layout != vk::ImageLayout::eTransferSrcOptimal) {
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
@ -377,8 +379,9 @@ namespace skyline::gpu {
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = subresource, .subresourceRange = subresource,
}); });
}); })};
cycle->AttachObjects(std::move(source), shared_from_this()); lCycle->AttachObjects(std::move(source), shared_from_this());
cycle = lCycle;
} }
TextureView::TextureView(std::shared_ptr<Texture> backing, vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format format, vk::ComponentMapping mapping) : backing(std::move(backing)), type(type), format(format), mapping(mapping), range(range) {} TextureView::TextureView(std::shared_ptr<Texture> backing, vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format format, vk::ComponentMapping mapping) : backing(std::move(backing)), type(type), format(format), mapping(mapping), range(range) {}

View File

@ -58,15 +58,58 @@ namespace skyline::gpu {
} }
}; };
enum class SwizzleChannel : u8 {
Zero, //!< Write 0 to the channel
One, //!< Write 1 to the channel
Red, //!< Red color channel
Green, //!< Green color channel
Blue, //!< Blue color channel
Alpha, //!< Alpha channel
};
struct Swizzle {
SwizzleChannel red{SwizzleChannel::Red}; //!< Swizzle for the red channel
SwizzleChannel green{SwizzleChannel::Green}; //!< Swizzle for the green channel
SwizzleChannel blue{SwizzleChannel::Blue}; //!< Swizzle for the blue channel
SwizzleChannel alpha{SwizzleChannel::Alpha}; //!< Swizzle for the alpha channel
constexpr operator vk::ComponentMapping() {
auto swizzleConvert{[](SwizzleChannel channel) {
switch (channel) {
case SwizzleChannel::Zero:
return vk::ComponentSwizzle::eZero;
case SwizzleChannel::One:
return vk::ComponentSwizzle::eOne;
case SwizzleChannel::Red:
return vk::ComponentSwizzle::eR;
case SwizzleChannel::Green:
return vk::ComponentSwizzle::eG;
case SwizzleChannel::Blue:
return vk::ComponentSwizzle::eB;
case SwizzleChannel::Alpha:
return vk::ComponentSwizzle::eA;
}
}};
return vk::ComponentMapping{
.r = swizzleConvert(red),
.g = swizzleConvert(green),
.b = swizzleConvert(blue),
.a = swizzleConvert(alpha),
};
}
};
/** /**
* @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed) * @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed)
*/ */
struct FormatBase { struct FormatBase {
u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats
u16 blockHeight{}; //!< The height of a block in pixels
u16 blockWidth{}; //!< The width of a block in pixels
vk::Format vkFormat{vk::Format::eUndefined}; vk::Format vkFormat{vk::Format::eUndefined};
vk::ImageAspectFlags vkAspect{vk::ImageAspectFlagBits::eColor}; vk::ImageAspectFlags vkAspect{vk::ImageAspectFlagBits::eColor};
Swizzle swizzle{};
u16 blockHeight{1}; //!< The height of a block in pixels
u16 blockWidth{1}; //!< The width of a block in pixels
constexpr bool IsCompressed() const { constexpr bool IsCompressed() const {
return (blockHeight != 1) || (blockWidth != 1); return (blockHeight != 1) || (blockWidth != 1);
@ -182,48 +225,6 @@ namespace skyline::gpu {
} }
}; };
enum class SwizzleChannel : u8 {
Zero, //!< Write 0 to the channel
One, //!< Write 1 to the channel
Red, //!< Red color channel
Green, //!< Green color channel
Blue, //!< Blue color channel
Alpha, //!< Alpha channel
};
struct Swizzle {
SwizzleChannel red{SwizzleChannel::Red}; //!< Swizzle for the red channel
SwizzleChannel green{SwizzleChannel::Green}; //!< Swizzle for the green channel
SwizzleChannel blue{SwizzleChannel::Blue}; //!< Swizzle for the blue channel
SwizzleChannel alpha{SwizzleChannel::Alpha}; //!< Swizzle for the alpha channel
constexpr operator vk::ComponentMapping() {
auto swizzleConvert{[](SwizzleChannel channel) {
switch (channel) {
case SwizzleChannel::Zero:
return vk::ComponentSwizzle::eZero;
case SwizzleChannel::One:
return vk::ComponentSwizzle::eOne;
case SwizzleChannel::Red:
return vk::ComponentSwizzle::eR;
case SwizzleChannel::Green:
return vk::ComponentSwizzle::eG;
case SwizzleChannel::Blue:
return vk::ComponentSwizzle::eB;
case SwizzleChannel::Alpha:
return vk::ComponentSwizzle::eA;
}
}};
return vk::ComponentMapping{
.r = swizzleConvert(red),
.g = swizzleConvert(green),
.b = swizzleConvert(blue),
.a = swizzleConvert(alpha),
};
}
};
/** /**
* @brief The type of a texture to determine the access patterns for it * @brief The type of a texture to determine the access patterns for it
* @note This is effectively the Tegra X1 texture types with the 1DBuffer + 2DNoMipmap removed as those are handled elsewhere * @note This is effectively the Tegra X1 texture types with the 1DBuffer + 2DNoMipmap removed as those are handled elsewhere
@ -314,7 +315,7 @@ namespace skyline::gpu {
friend TextureView; friend TextureView;
public: public:
std::shared_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing std::weak_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing
std::optional<GuestTexture> guest; std::optional<GuestTexture> guest;
texture::Dimensions dimensions; texture::Dimensions dimensions;
texture::Format format; texture::Format format;

View File

@ -84,7 +84,7 @@ namespace skyline::loader {
size_t length{}; size_t length{};
std::unique_ptr<char, decltype(&std::free)> demangled{abi::__cxa_demangle(symbol.name, nullptr, &length, &status), std::free}; std::unique_ptr<char, decltype(&std::free)> demangled{abi::__cxa_demangle(symbol.name, nullptr, &length, &status), std::free};
return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast<uintptr_t>(pointer), (status == 0) ? std::string(demangled.get()) : symbol.name, symbol.executableName); return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast<uintptr_t>(pointer), (status == 0) ? std::string_view(demangled.get()) : symbol.name, symbol.executableName);
} else if (!symbol.executableName.empty()) { } else if (!symbol.executableName.empty()) {
return fmt::format("\n* 0x{:X} (from {})", reinterpret_cast<uintptr_t>(pointer), symbol.executableName); return fmt::format("\n* 0x{:X} (from {})", reinterpret_cast<uintptr_t>(pointer), symbol.executableName);
} else if (dladdr(pointer, &info)) { } else if (dladdr(pointer, &info)) {
@ -92,7 +92,7 @@ namespace skyline::loader {
size_t length{}; size_t length{};
std::unique_ptr<char, decltype(&std::free)> demangled{abi::__cxa_demangle(info.dli_sname, nullptr, &length, &status), std::free}; std::unique_ptr<char, decltype(&std::free)> demangled{abi::__cxa_demangle(info.dli_sname, nullptr, &length, &status), std::free};
return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast<uintptr_t>(pointer), (status == 0) ? std::string(demangled.get()) : info.dli_sname, info.dli_fname); return fmt::format("\n* 0x{:X} ({} from {})", reinterpret_cast<uintptr_t>(pointer), (status == 0) ? std::string_view(demangled.get()) : info.dli_sname ? info.dli_sname : "Unresolved", info.dli_fname ? info.dli_fname : "Unresolved");
} else { } else {
return fmt::format("\n* 0x{:X}", reinterpret_cast<uintptr_t>(pointer)); return fmt::format("\n* 0x{:X}", reinterpret_cast<uintptr_t>(pointer));
} }

View File

@ -42,9 +42,18 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
enum class ColorFormat : u32 { enum class ColorFormat : u32 {
None = 0x0, None = 0x0,
R32B32G32A32Float = 0xC0,
R16G16B16A16Float = 0xCA,
A2B10G10R10Unorm = 0xD1, A2B10G10R10Unorm = 0xD1,
R8G8B8A8Unorm = 0xD5, R8G8B8A8Unorm = 0xD5,
A8B8G8R8Srgb = 0xD6, A8B8G8R8Srgb = 0xD6,
R16G16Snorm = 0xDB,
R16G16Float = 0xDE,
B10G11R11Float = 0xE0,
R32Float = 0xE5,
R8G8Snorm = 0xEB,
R16Float = 0xF2,
R8Unorm = 0xF3,
} format; } format;
struct TileMode { struct TileMode {