From 0d9992cb8e5597714c9eb275ed1ca54a5c29d231 Mon Sep 17 00:00:00 2001 From: lynxnb Date: Wed, 20 Apr 2022 11:26:06 +0200 Subject: [PATCH] Implement `QuadList` support for non-indexed draws --- app/CMakeLists.txt | 1 + app/src/main/cpp/skyline/gpu/buffer.h | 10 ++++ .../gpu/interconnect/conversion/quads.cpp | 22 +++++++ .../gpu/interconnect/conversion/quads.h | 33 +++++++++++ .../gpu/interconnect/graphics_context.h | 59 +++++++++++++++---- .../skyline/soc/gm20b/engines/maxwell_3d.cpp | 5 +- 6 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.cpp create mode 100644 app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 458e9eb8..cf38e2d7 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -171,6 +171,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/shader_manager.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp + ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp ${source_DIR}/skyline/soc/smmu.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/host1x/command_fifo.cpp diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index b9a7b9d4..5d3b147b 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -87,6 +87,16 @@ namespace skyline::gpu { return backing.vkBuffer; } + /** + * @return A span over the backing of this buffer + * @note This operation **must** be performed only on host-only buffers since synchronization is handled internally for guest-backed buffers + */ + span GetBackingSpan() { + if (guest) + throw exception("Attempted to get a span of a guest-backed buffer"); + return span(backing); + } + Buffer(GPU &gpu, GuestBuffer guest); /** diff --git a/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.cpp b/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.cpp new file mode 100644 index 00000000..e073b088 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.cpp @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "quads.h" + +namespace skyline::gpu::interconnect::conversion::quads { + void GenerateQuadListConversionBuffer(u32 *dest, u32 vertexCount) { + #pragma clang loop vectorize(enable) interleave(enable) unroll(enable) + for (u32 i{}; i < vertexCount; i += 4) { + // Given a quad ABCD, we want to generate triangles ABC & CDA + // Triangle ABC + *(dest++) = i + 0; + *(dest++) = i + 1; + *(dest++) = i + 2; + + // Triangle CDA + *(dest++) = i + 2; + *(dest++) = i + 3; + *(dest++) = i + 0; + } + } +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.h b/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.h new file mode 100644 index 00000000..4556f11c --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.h @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include + +namespace skyline::gpu::interconnect::conversion::quads { + constexpr u32 EmittedIndexCount{6}; //!< The number of indices needed to draw a quad with two triangles + constexpr u32 QuadVertexCount{4}; //!< The amount of vertices a quad is composed of + + /** + * @return The amount of indices emitted converting a buffer with the supplied element count + */ + constexpr u32 GetIndexCount(u32 count) { + return (count * EmittedIndexCount) / QuadVertexCount; + } + + /** + * @return The minimum size (in bytes) required to store the quad index buffer of the given type after conversion + * @tparam T The type of an element in the index buffer + */ + template + constexpr size_t GetRequiredBufferSize(u32 count) { + return GetIndexCount(count) * sizeof(T); + } + + /** + * @brief Create an index buffer that repeats quad vertices to generate a triangle list + * @note The size of the supplied buffer should be at least the size returned by GetRequiredBufferSize() + */ + void GenerateQuadListConversionBuffer(u32 *dest, u32 vertexCount); +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 60de2769..91ea42f4 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -15,6 +15,7 @@ #include "command_executor.h" #include "types/tsc.h" #include "types/tic.h" +#include "conversion/quads.h" namespace skyline::gpu::interconnect { namespace maxwell3d = soc::gm20b::engine::maxwell3d::type; @@ -1581,6 +1582,25 @@ namespace skyline::gpu::interconnect { }; std::array vertexAttributes{}; + public: + bool needsQuadConversion{}; //!< Whether the current primitive topology is quads and needs conversion to triangles + + private: + std::shared_ptr quadListConversionBuffer{}; //!< Index buffer used for QuadList conversion + + /** + * @brief Retrieves an index buffer for converting a non-indexed quad list to a triangle list + * @result A tuple containing a view over the index buffer, the index type and the index count + */ + std::tuple GetQuadListConversionBuffer(u32 count) { + vk::DeviceSize size{conversion::quads::GetRequiredBufferSize(count)}; + if (!quadListConversionBuffer || quadListConversionBuffer->GetBackingSpan().size_bytes() < size) { + quadListConversionBuffer = std::make_shared(gpu, size); + conversion::quads::GenerateQuadListConversionBuffer(quadListConversionBuffer->GetBackingSpan().cast().data(), count); + } + return {quadListConversionBuffer->GetView(0, size), vk::IndexType::eUint32, conversion::quads::GetIndexCount(count)}; + } + public: void SetVertexBufferStride(u32 index, u32 stride) { vertexBuffers[index].bindingDescription.stride = stride; @@ -1774,27 +1794,29 @@ namespace skyline::gpu::interconnect { public: void SetPrimitiveTopology(maxwell3d::PrimitiveTopology topology) { - auto[vkTopology, shaderTopology] = [topology]() -> std::tuple { + auto[vkTopology, shaderTopology, isQuad] = [topology]() -> std::tuple { using MaxwellTopology = maxwell3d::PrimitiveTopology; using VkTopology = vk::PrimitiveTopology; using ShaderTopology = ShaderCompiler::InputTopology; switch (topology) { // @fmt:off - case MaxwellTopology::PointList: return {VkTopology::ePointList, ShaderTopology::Points}; + case MaxwellTopology::PointList: return {VkTopology::ePointList, ShaderTopology::Points, false}; - case MaxwellTopology::LineList: return {VkTopology::eLineList, ShaderTopology::Lines}; - case MaxwellTopology::LineStrip: return {VkTopology::eLineStrip, ShaderTopology::Lines}; - case MaxwellTopology::LineListWithAdjacency: return {VkTopology::eLineListWithAdjacency, ShaderTopology::LinesAdjacency}; - case MaxwellTopology::LineStripWithAdjacency: return {VkTopology::eLineStripWithAdjacency, ShaderTopology::LinesAdjacency}; + case MaxwellTopology::LineList: return {VkTopology::eLineList, ShaderTopology::Lines, false}; + case MaxwellTopology::LineStrip: return {VkTopology::eLineStrip, ShaderTopology::Lines, false}; + case MaxwellTopology::LineListWithAdjacency: return {VkTopology::eLineListWithAdjacency, ShaderTopology::LinesAdjacency, false}; + case MaxwellTopology::LineStripWithAdjacency: return {VkTopology::eLineStripWithAdjacency, ShaderTopology::LinesAdjacency, false}; - case MaxwellTopology::TriangleList: return {VkTopology::eTriangleList, ShaderTopology::Triangles}; - case MaxwellTopology::TriangleStrip: return {VkTopology::eTriangleStrip, ShaderTopology::Triangles}; - case MaxwellTopology::TriangleFan: return {VkTopology::eTriangleFan, ShaderTopology::Triangles}; - case MaxwellTopology::TriangleListWithAdjacency: return {VkTopology::eTriangleListWithAdjacency, ShaderTopology::TrianglesAdjacency}; - case MaxwellTopology::TriangleStripWithAdjacency: return {VkTopology::eTriangleStripWithAdjacency, ShaderTopology::TrianglesAdjacency}; + case MaxwellTopology::TriangleList: return {VkTopology::eTriangleList, ShaderTopology::Triangles, false}; + case MaxwellTopology::TriangleStrip: return {VkTopology::eTriangleStrip, ShaderTopology::Triangles, false}; + case MaxwellTopology::TriangleFan: return {VkTopology::eTriangleFan, ShaderTopology::Triangles, false}; + case MaxwellTopology::TriangleListWithAdjacency: return {VkTopology::eTriangleListWithAdjacency, ShaderTopology::TrianglesAdjacency, false}; + case MaxwellTopology::TriangleStripWithAdjacency: return {VkTopology::eTriangleStripWithAdjacency, ShaderTopology::TrianglesAdjacency, false}; - case MaxwellTopology::PatchList: return {VkTopology::ePatchList, ShaderTopology::Triangles}; + case MaxwellTopology::QuadList: return {VkTopology::eTriangleList, ShaderTopology::Triangles, true}; + + case MaxwellTopology::PatchList: return {VkTopology::ePatchList, ShaderTopology::Triangles, false}; // @fmt:on @@ -1804,6 +1826,7 @@ namespace skyline::gpu::interconnect { }(); inputAssemblyState.topology = vkTopology; + needsQuadConversion = isQuad; UpdateRuntimeInformation(runtimeInfo.input_topology, shaderTopology, maxwell3d::PipelineStage::Geometry); } @@ -2595,6 +2618,18 @@ namespace skyline::gpu::interconnect { auto boundIndexBuffer{std::make_shared()}; if constexpr (IsIndexed) { auto indexBufferView{GetIndexBuffer(count)}; + + if (needsQuadConversion) { + if (indexBufferView) { + throw exception("Indexed quad conversion is not supported"); + } else { + auto[bufferView, indexType, indexCount] = GetQuadListConversionBuffer(count); + indexBufferView = bufferView; + indexBuffer.type = indexType; + count = indexCount; + } + } + { std::scoped_lock lock(indexBufferView); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 5eecf20b..b88532ce 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -618,7 +618,10 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }) ENGINE_CASE(drawVertexCount, { - context.DrawVertex(drawVertexCount, *registers.drawVertexFirst); + if (context.needsQuadConversion) + context.DrawIndexed(drawVertexCount, *registers.drawVertexFirst, 0); + else + context.DrawVertex(drawVertexCount, *registers.drawVertexFirst); }) ENGINE_CASE(drawIndexCount, {