diff --git a/app/src/main/cpp/skyline/gpu/context/graphics_context.h b/app/src/main/cpp/skyline/gpu/context/graphics_context.h new file mode 100644 index 00000000..7ea9a568 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/context/graphics_context.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include + +namespace skyline::gpu::context { + namespace maxwell3d = soc::gm20b::engine::maxwell3d::type; + + /** + * @brief Host-equivalent context for state of the Maxwell3D engine on the guest + */ + class GraphicsContext { + private: + GPU &gpu; + + std::array viewports; + + std::array scissors; + constexpr static vk::Rect2D DefaultScissor{ + .extent = { + .height = std::numeric_limits::max(), + .width = std::numeric_limits::max(), + } + }; //!< A scissor which displays the entire viewport, utilized when the viewport scissor is disabled + + public: + GraphicsContext(GPU &gpu) : gpu(gpu) { + scissors.fill(DefaultScissor); + } + + /* Viewport Transforms */ + + /** + * @url https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#vertexpostproc-viewport + * @note Comments are written in the way of getting the same viewport transformations to be done on the host rather than deriving the host structure values from the guest submitted values, fundamentally the same thing but it is consistent with not assuming a certain guest API + */ + void SetViewportX(size_t index, float scale, float translate) { + auto &viewport{viewports.at(index)}; + viewport.x = scale - translate; // Counteract the addition of the half of the width (o_x) to the host translation + viewport.width = scale * 2.0f; // Counteract the division of the width (p_x) by 2 for the host scale + } + + void SetViewportY(size_t index, float scale, float translate) { + auto &viewport{viewports.at(index)}; + viewport.y = scale - translate; // Counteract the addition of the half of the height (p_y/2 is center) to the host translation (o_y) + viewport.height = scale * 2.0f; // Counteract the division of the height (p_y) by 2 for the host scale + } + + void SetViewportZ(size_t index, float scale, float translate) { + auto &viewport{viewports.at(index)}; + viewport.minDepth = translate; // minDepth (o_z) directly corresponds to the host translation + viewport.maxDepth = scale + translate; // Counteract the subtraction of the maxDepth (p_z - o_z) by minDepth (o_z) for the host scale + } + + /* Viewport Scissors */ + + void SetScissor(size_t index, std::optional scissor) { + scissors.at(index) = scissor ? vk::Rect2D{ + .offset.x = scissor->horizontal.minimum, + .extent.width = scissor->horizontal.maximum, + .offset.y = scissor->vertical.minimum, + .extent.height = scissor->horizontal.maximum, + } : DefaultScissor; + } + + void SetScissorHorizontal(size_t index, maxwell3d::Scissor::ScissorBounds bounds) { + auto &scissor{scissors.at(index)}; + scissor.offset.x = bounds.minimum; + scissor.extent.width = bounds.maximum; + } + + void SetScissorVertical(size_t index, maxwell3d::Scissor::ScissorBounds bounds) { + auto &scissor{scissors.at(index)}; + scissor.offset.y = bounds.minimum; + scissor.extent.height = bounds.maximum; + } + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h new file mode 100644 index 00000000..c258400a --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include + +namespace skyline::soc::gm20b::engine::maxwell3d::type { + #pragma pack(push, 1) + + /** + * @brief A 40-bit GMMU virtual address with register-packing + */ + struct Address { + u32 high; + u32 low; + + u64 Pack() { + return (static_cast(high) << 32) | low; + } + }; + static_assert(sizeof(Address) == sizeof(u64)); + + enum class MmeShadowRamControl : u32 { + MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM + MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter + MethodPassthrough = 2, //!< Does nothing, no write tracking or hooking + MethodReplay = 3, //!< Replays older tracked writes for any new writes to registers, discarding the contents of the new write + }; + + constexpr static size_t ViewportCount{16}; //!< Amount of viewports on Maxwell 3D, array size for any per-viewport parameter such as transform, scissors, etc + + /** + * @brief The transformations applied on any primitive sent to a viewport + */ + struct ViewportTransform { + float scaleX; //!< Scales all X-axis primitive coordinates by this factor + float scaleY; + float scaleZ; + float translateX; //!< Translates all X-axis primitive coordinates by this value + float translateY; + float translateZ; + + /** + * @brief A component swizzle applied to primitive coordinates prior to clipping/perspective divide with optional negation + * @note This functionality is exposed via GL_NV_viewport_swizzle (OpenGL) and VK_NV_viewport_swizzle (Vulkan) + */ + enum class Swizzle : u8 { + PositiveX = 0, + NegativeX = 1, + PositiveY = 2, + NegativeY = 3, + PositiveZ = 4, + NegativeZ = 5, + PositiveW = 6, + NegativeW = 7, + }; + + struct { + Swizzle x : 3; + u8 _pad0_ : 1; + Swizzle y : 3; + u8 _pad1_ : 1; + Swizzle z : 3; + u8 _pad2_ : 1; + Swizzle w : 3; + u32 _pad3_ : 17; + } swizzles; + + /** + * @brief The amount of subpixel bits on screen-space axes that bias if a pixel is inside a primitive for conservative rasterization + * @note This functionality is exposed via GL_NV_conservative_raster (OpenGL) using SubpixelPrecisionBiasNV + */ + struct { + u8 x : 5; + u8 _pad0_ : 3; + u8 y : 5; + u32 _pad1_ : 19; + } subpixelPrecisionBias; + }; + static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32))); + + /** + * @brief The offset and extent of the viewport for transformation of coordinates from NDC-space (Normalized Device Coordinates) to screen-space + * @note This is effectively unused since all this data can be derived from the viewport transform, this misses crucial data that the transform has such as depth range order and viewport axis inverse transformations + */ + struct Viewport { + struct { + u16 x; + u16 width; + }; + + struct { + u16 y; + u16 height; + }; + + float depthRangeNear; + float depthRangeFar; + }; + static_assert(sizeof(Viewport) == (0x4 * sizeof(u32))); + + /** + * @brief The method used to rasterize polygons, not to be confused with the primitive type + * @note This functionality is exposed via glPolygonMode (OpenGL) + */ + enum class PolygonMode : u32 { + Point = 0x1B00, //!< Draw a point for every vertex + Line = 0x1B01, //!< Draw a line between all vertices + Fill = 0x1B02, //!< Fill the area bounded by the vertices + }; + + /** + * @brief A scissor which is used to reject all writes to non-masked regions + * @note All coordinates are in screen-space as defined by the viewport + */ + struct Scissor { + u32 enable; //!< Rejects non-masked writes when enabled and allows all writes otherwise + struct ScissorBounds { + u16 minimum; //!< The lower bound of the masked region in a dimension + u16 maximum; //!< The higher bound of the masked region in a dimension + } horizontal, vertical; + u32 next; + }; + static_assert(sizeof(Scissor) == (0x4 * sizeof(u32))); + + union VertexAttribute { + u32 raw; + + enum class Size : u8 { + Size_1x32 = 0x12, + Size_2x32 = 0x04, + Size_3x32 = 0x02, + Size_4x32 = 0x01, + Size_1x16 = 0x1B, + Size_2x16 = 0x0F, + Size_3x16 = 0x05, + Size_4x16 = 0x03, + Size_1x8 = 0x1D, + Size_2x8 = 0x18, + Size_3x8 = 0x13, + Size_4x8 = 0x0A, + Size_10_10_10_2 = 0x30, + Size_11_11_10 = 0x31, + }; + + enum class Type : u8 { + None = 0, + SNorm = 1, + UNorm = 2, + SInt = 3, + UInt = 4, + UScaled = 5, + SScaled = 6, + Float = 7, + }; + + struct { + u8 bufferId : 5; + u8 _pad0_ : 1; + bool fixed : 1; + u16 offset : 14; + Size size : 6; + Type type : 3; + u8 _pad1_ : 1; + bool bgra : 1; + }; + }; + static_assert(sizeof(VertexAttribute) == sizeof(u32)); + + enum class CompareOp : u32 { + Never = 1, + Less = 2, + Equal = 3, + LessOrEqual = 4, + Greater = 5, + NotEqual = 6, + GreaterOrEqual = 7, + Always = 8, + + NeverGL = 0x200, + LessGL = 0x201, + EqualGL = 0x202, + LessOrEqualGL = 0x203, + GreaterGL = 0x204, + NotEqualGL = 0x205, + GreaterOrEqualGL = 0x206, + AlwaysGL = 0x207, + }; + + struct Blend { + enum class Op : u32 { + Add = 1, + Subtract = 2, + ReverseSubtract = 3, + Minimum = 4, + Maximum = 5, + + AddGL = 0x8006, + SubtractGL = 0x8007, + ReverseSubtractGL = 0x8008, + MinimumGL = 0x800A, + MaximumGL = 0x800B, + }; + + enum class Factor : u32 { + Zero = 0x1, + One = 0x2, + SourceColor = 0x3, + OneMinusSourceColor = 0x4, + SourceAlpha = 0x5, + OneMinusSourceAlpha = 0x6, + DestAlpha = 0x7, + OneMinusDestAlpha = 0x8, + DestColor = 0x9, + OneMinusDestColor = 0xA, + SourceAlphaSaturate = 0xB, + Source1Color = 0x10, + OneMinusSource1Color = 0x11, + Source1Alpha = 0x12, + OneMinusSource1Alpha = 0x13, + ConstantColor = 0x61, + OneMinusConstantColor = 0x62, + ConstantAlpha = 0x63, + OneMinusConstantAlpha = 0x64, + + ZeroGL = 0x4000, + OneGL = 0x4001, + SourceColorGL = 0x4300, + OneMinusSourceColorGL = 0x4301, + SourceAlphaGL = 0x4302, + OneMinusSourceAlphaGL = 0x4303, + DestAlphaGL = 0x4304, + OneMinusDestAlphaGL = 0x4305, + DestColorGL = 0x4306, + OneMinusDestColorGL = 0x4307, + SourceAlphaSaturateGL = 0x4308, + ConstantColorGL = 0xC001, + OneMinusConstantColorGL = 0xC002, + ConstantAlphaGL = 0xC003, + OneMinusConstantAlphaGL = 0xC004, + Source1ColorGL = 0xC900, + OneMinusSource1ColorGL = 0xC901, + Source1AlphaGL = 0xC902, + OneMinusSource1AlphaGL = 0xC903, + }; + + struct { + u32 seperateAlpha; + Op colorOp; + Factor colorSrcFactor; + Factor colorDestFactor; + Op alphaOp; + Factor alphaSrcFactor; + Factor alphaDestFactor; + u32 _pad_; + }; + }; + static_assert(sizeof(Blend) == (sizeof(u32) * 8)); + + enum class StencilOp : u32 { + Keep = 1, + Zero = 2, + Replace = 3, + IncrementAndClamp = 4, + DecrementAndClamp = 5, + Invert = 6, + IncrementAndWrap = 7, + DecrementAndWrap = 8, + }; + + enum class FrontFace : u32 { + Clockwise = 0x900, + CounterClockwise = 0x901, + }; + + enum class CullFace : u32 { + Front = 0x404, + Back = 0x405, + FrontAndBack = 0x408, + }; + + union ColorWriteMask { + u32 raw; + + struct { + u8 r : 4; + u8 g : 4; + u8 b : 4; + u8 a : 4; + }; + }; + static_assert(sizeof(ColorWriteMask) == sizeof(u32)); + + struct SemaphoreInfo { + enum class Op : u8 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, + }; + + enum class ReductionOp : u8 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + }; + + enum class Unit : u8 { + VFetch = 1, + VP = 2, + Rast = 4, + StrmOut = 5, + GP = 6, + ZCull = 7, + Prop = 10, + Crop = 15, + }; + + enum class SyncCondition : u8 { + NotEqual = 0, + GreaterThan = 1, + }; + + enum class Format : u8 { + U32 = 0, + I32 = 1, + }; + + enum class CounterType : u8 { + Zero = 0x0, + InputVertices = 0x1, + InputPrimitives = 0x3, + VertexShaderInvocations = 0x5, + GeometryShaderInvocations = 0x7, + GeometryShaderPrimitives = 0x9, + ZcullStats0 = 0xA, + TransformFeedbackPrimitivesWritten = 0xB, + ZcullStats1 = 0xC, + ZcullStats2 = 0xE, + ClipperInputPrimitives = 0xF, + ZcullStats3 = 0x10, + ClipperOutputPrimitives = 0x11, + PrimitivesGenerated = 0x12, + FragmentShaderInvocations = 0x13, + SamplesPassed = 0x15, + TransformFeedbackOffset = 0x1A, + TessControlShaderInvocations = 0x1B, + TessEvaluationShaderInvocations = 0x1D, + TessEvaluationShaderPrimitives = 0x1F, + }; + + enum class StructureSize : u8 { + FourWords = 0, + OneWord = 1, + }; + + Op op : 2; + bool flushDisable : 1; + bool reductionEnable : 1; + bool fenceEnable : 1; + u8 _pad0_ : 4; + ReductionOp reductionOp : 3; + Unit unit : 4; + SyncCondition syncCondition : 1; + Format format : 2; + u8 _pad1_ : 1; + bool awakenEnable : 1; + u8 _pad2_ : 2; + CounterType counterType : 5; + StructureSize structureSize : 1; + }; + static_assert(sizeof(SemaphoreInfo) == sizeof(u32)); + + enum class CoordOrigin : u8 { + LowerLeft = 0, + UpperLeft = 1, + }; + + #pragma pack(pop) +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index e17b062c..d46d4f18 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -4,7 +4,7 @@ #include namespace skyline::soc::gm20b::engine::maxwell3d { - Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) { + Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this), context(*state.gpu) { ResetRegs(); } @@ -13,29 +13,29 @@ namespace skyline::soc::gm20b::engine::maxwell3d { registers.rasterizerEnable = true; - for (auto &transform : registers.viewportTransform) { - transform.swizzles.x = Registers::ViewportTransform::Swizzle::PositiveX; - transform.swizzles.y = Registers::ViewportTransform::Swizzle::PositiveY; - transform.swizzles.z = Registers::ViewportTransform::Swizzle::PositiveZ; - transform.swizzles.w = Registers::ViewportTransform::Swizzle::PositiveW; + for (auto &transform : registers.viewportTransforms) { + transform.swizzles.x = type::ViewportTransform::Swizzle::PositiveX; + transform.swizzles.y = type::ViewportTransform::Swizzle::PositiveY; + transform.swizzles.z = type::ViewportTransform::Swizzle::PositiveZ; + transform.swizzles.w = type::ViewportTransform::Swizzle::PositiveW; } - for (auto &viewport : registers.viewport) { + for (auto &viewport : registers.viewports) { viewport.depthRangeFar = 1.0f; viewport.depthRangeNear = 0.0f; } - registers.polygonMode.front = Registers::PolygonMode::Fill; - registers.polygonMode.back = Registers::PolygonMode::Fill; + registers.polygonMode.front = type::PolygonMode::Fill; + registers.polygonMode.back = type::PolygonMode::Fill; - registers.stencilFront.failOp = registers.stencilFront.zFailOp = registers.stencilFront.zPassOp = Registers::StencilOp::Keep; - registers.stencilFront.compare.op = Registers::CompareOp::Always; + registers.stencilFront.failOp = registers.stencilFront.zFailOp = registers.stencilFront.zPassOp = type::StencilOp::Keep; + registers.stencilFront.compare.op = type::CompareOp::Always; registers.stencilFront.compare.mask = 0xFFFFFFFF; registers.stencilFront.writeMask = 0xFFFFFFFF; registers.stencilTwoSideEnable = true; - registers.stencilBack.failOp = registers.stencilBack.zFailOp = registers.stencilBack.zPassOp = Registers::StencilOp::Keep; - registers.stencilBack.compareOp = Registers::CompareOp::Always; + registers.stencilBack.failOp = registers.stencilBack.zFailOp = registers.stencilBack.zPassOp = type::StencilOp::Keep; + registers.stencilBack.compareOp = type::CompareOp::Always; registers.stencilBackExtra.compareMask = 0xFFFFFFFF; registers.stencilBackExtra.writeMask = 0xFFFFFFFF; @@ -44,11 +44,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d { for (auto &attribute : registers.vertexAttributeState) attribute.fixed = true; - registers.depthTestFunc = Registers::CompareOp::Always; + registers.depthTestFunc = type::CompareOp::Always; - registers.blend.colorOp = registers.blend.alphaOp = Registers::Blend::Op::Add; - registers.blend.colorSrcFactor = registers.blend.alphaSrcFactor = Registers::Blend::Factor::One; - registers.blend.colorDestFactor = registers.blend.alphaDestFactor = Registers::Blend::Factor::Zero; + registers.blend.colorOp = registers.blend.alphaOp = type::Blend::Op::Add; + registers.blend.colorSrcFactor = registers.blend.alphaSrcFactor = type::Blend::Factor::One; + registers.blend.colorDestFactor = registers.blend.alphaDestFactor = type::Blend::Factor::Zero; registers.lineWidthSmooth = 1.0f; registers.lineWidthAliased = 1.0f; @@ -57,16 +57,16 @@ namespace skyline::soc::gm20b::engine::maxwell3d { registers.pointSpriteSize = 1.0f; registers.pointCoordReplace.enable = true; - registers.frontFace = Registers::FrontFace::CounterClockwise; - registers.cullFace = Registers::CullFace::Back; + registers.frontFace = type::FrontFace::CounterClockwise; + registers.cullFace = type::CullFace::Back; for (auto &mask : registers.colorMask) mask.r = mask.g = mask.b = mask.a = 1; for (auto &blend : registers.independentBlend) { - blend.colorOp = blend.alphaOp = Registers::Blend::Op::Add; - blend.colorSrcFactor = blend.alphaSrcFactor = Registers::Blend::Factor::One; - blend.colorDestFactor = blend.alphaDestFactor = Registers::Blend::Factor::Zero; + blend.colorOp = blend.alphaOp = type::Blend::Op::Add; + blend.colorSrcFactor = blend.alphaSrcFactor = type::Blend::Factor::One; + blend.colorDestFactor = blend.alphaDestFactor = type::Blend::Factor::Zero; } registers.viewportTransformEnable = true; @@ -104,9 +104,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { registers.raw[method] = argument; - if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodTrack || shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodTrackWithFilter) + if (shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodTrack || shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodTrackWithFilter) shadowRegisters.raw[method] = argument; - else if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodReplay) + else if (shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodReplay) argument = shadowRegisters.raw[method]; switch (method) { @@ -120,32 +120,113 @@ namespace skyline::soc::gm20b::engine::maxwell3d { registers.mme.instructionRamPointer %= macroCode.size(); break; + case MAXWELL3D_OFFSET(mme.startAddressRamLoad): if (registers.mme.startAddressRamPointer >= macroPositions.size()) throw exception("Maximum amount of macros reached!"); macroPositions[registers.mme.startAddressRamPointer++] = argument; break; + case MAXWELL3D_OFFSET(mme.shadowRamControl): - shadowRegisters.mme.shadowRamControl = static_cast(argument); + shadowRegisters.mme.shadowRamControl = static_cast(argument); break; + case MAXWELL3D_OFFSET(syncpointAction): state.logger->Debug("Increment syncpoint: {}", static_cast(registers.syncpointAction.id)); state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment(); break; + + #define VIEWPORT_TRANSFORM_CALLBACKS(index) \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleX): \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateX): \ + context.SetViewportX(index, registers.viewportTransforms[index].scaleX, registers.viewportTransforms[index].translateX); \ + break; \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleY): \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateY): \ + context.SetViewportY(index, registers.viewportTransforms[index].scaleY, registers.viewportTransforms[index].translateY); \ + break; \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleZ): \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateZ): \ + context.SetViewportZ(index, registers.viewportTransforms[index].scaleY, registers.viewportTransforms[index].translateY); \ + break + + VIEWPORT_TRANSFORM_CALLBACKS(0); + VIEWPORT_TRANSFORM_CALLBACKS(1); + VIEWPORT_TRANSFORM_CALLBACKS(2); + VIEWPORT_TRANSFORM_CALLBACKS(3); + VIEWPORT_TRANSFORM_CALLBACKS(4); + VIEWPORT_TRANSFORM_CALLBACKS(5); + VIEWPORT_TRANSFORM_CALLBACKS(6); + VIEWPORT_TRANSFORM_CALLBACKS(7); + VIEWPORT_TRANSFORM_CALLBACKS(8); + VIEWPORT_TRANSFORM_CALLBACKS(9); + VIEWPORT_TRANSFORM_CALLBACKS(10); + VIEWPORT_TRANSFORM_CALLBACKS(11); + VIEWPORT_TRANSFORM_CALLBACKS(12); + VIEWPORT_TRANSFORM_CALLBACKS(13); + VIEWPORT_TRANSFORM_CALLBACKS(14); + VIEWPORT_TRANSFORM_CALLBACKS(15); + + static_assert(type::ViewportCount == 16); + #undef VIEWPORT_TRANSFORM_CALLBACKS + + #define SCISSOR_CALLBACKS(index) \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, enable): \ + context.SetScissor(index, argument ? registers.scissors[index] : std::optional{}); \ + break; \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, horizontal): \ + context.SetScissorHorizontal(index, registers.scissors[index].horizontal); \ + break; \ + case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, vertical): \ + context.SetScissorVertical(index, registers.scissors[index].vertical); \ + break + + SCISSOR_CALLBACKS(0); + SCISSOR_CALLBACKS(1); + SCISSOR_CALLBACKS(2); + SCISSOR_CALLBACKS(3); + SCISSOR_CALLBACKS(4); + SCISSOR_CALLBACKS(5); + SCISSOR_CALLBACKS(6); + SCISSOR_CALLBACKS(7); + SCISSOR_CALLBACKS(8); + SCISSOR_CALLBACKS(9); + SCISSOR_CALLBACKS(10); + SCISSOR_CALLBACKS(11); + SCISSOR_CALLBACKS(12); + SCISSOR_CALLBACKS(13); + SCISSOR_CALLBACKS(14); + SCISSOR_CALLBACKS(15); + + static_assert(type::ViewportCount == 16); + #undef SCISSOR_CALLBACKS + case MAXWELL3D_OFFSET(semaphore.info): switch (registers.semaphore.info.op) { - case Registers::SemaphoreInfo::Op::Release: + case type::SemaphoreInfo::Op::Release: WriteSemaphoreResult(registers.semaphore.payload); break; - case Registers::SemaphoreInfo::Op::Counter: - HandleSemaphoreCounterOperation(); + + case type::SemaphoreInfo::Op::Counter: { + switch (registers.semaphore.info.counterType) { + case type::SemaphoreInfo::CounterType::Zero: + WriteSemaphoreResult(0); + break; + + default: + state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast(registers.semaphore.info.counterType)); + break; + } break; + } + default: state.logger->Warn("Unsupported semaphore operation: 0x{:X}", static_cast(registers.semaphore.info.op)); break; } break; + case MAXWELL3D_OFFSET(firmwareCall[4]): registers.raw[0xD00] = 1; break; @@ -154,17 +235,6 @@ namespace skyline::soc::gm20b::engine::maxwell3d { } } - void Maxwell3D::HandleSemaphoreCounterOperation() { - switch (registers.semaphore.info.counterType) { - case Registers::SemaphoreInfo::CounterType::Zero: - WriteSemaphoreResult(0); - break; - default: - state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast(registers.semaphore.info.counterType)); - break; - } - } - void Maxwell3D::WriteSemaphoreResult(u64 result) { struct FourWordResult { u64 value; @@ -172,10 +242,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }; switch (registers.semaphore.info.structureSize) { - case Registers::SemaphoreInfo::StructureSize::OneWord: + case type::SemaphoreInfo::StructureSize::OneWord: state.soc->gm20b.gmmu.Write(registers.semaphore.address.Pack(), static_cast(result)); break; - case Registers::SemaphoreInfo::StructureSize::FourWords: { + + case type::SemaphoreInfo::StructureSize::FourWords: { // Convert the current nanosecond time to GPU ticks constexpr u64 NsToTickNumerator{384}; constexpr u64 NsToTickDenominator{625}; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 423ebb63..df6e89bc 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -3,10 +3,19 @@ #pragma once +#include #include "engine.h" #include "maxwell/macro_interpreter.h" #define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field) +#define MAXWELL3D_STRUCT_OFFSET(field, member) U32_OFFSET(Registers, field) + offsetof(typeof(Registers::field), member) +#define MAXWELL3D_ARRAY_OFFSET(field, index) U32_OFFSET(Registers, field) + ((sizeof(typeof(Registers::field[0])) / sizeof(u32)) * index) +#define MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) MAXWELL3D_ARRAY_OFFSET(field, index) + U32_OFFSET(typeof(Registers::field[0]), member) +#define MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET(field, index, member, submember) MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) + U32_OFFSET(typeof(Registers::field[0].member), submember) + +namespace skyline::gpu::context { + class GraphicsContext; +} namespace skyline::soc::gm20b::engine::maxwell3d { /** @@ -23,356 +32,25 @@ namespace skyline::soc::gm20b::engine::maxwell3d { MacroInterpreter macroInterpreter; - void HandleSemaphoreCounterOperation(); + gpu::context::GraphicsContext context; + /** + * @brief Writes back a semaphore result to the guest with an auto-generated timestamp (if required) + * @note If the semaphore is OneWord then the result will be downcasted to a 32-bit unsigned integer + */ void WriteSemaphoreResult(u64 result); public: static constexpr u32 RegisterCount{0xE00}; //!< The number of Maxwell 3D registers /** - * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478 + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def + * @note To ease the extension of this structure, padding may follow both _padN_ and _padN_M_ formats */ #pragma pack(push, 1) union Registers { std::array raw; - struct Address { - u32 high; - u32 low; - - u64 Pack() { - return (static_cast(high) << 32) | low; - } - }; - static_assert(sizeof(Address) == sizeof(u64)); - - enum class MmeShadowRamControl : u32 { - MethodTrack = 0, - MethodTrackWithFilter = 1, - MethodPassthrough = 2, - MethodReplay = 3, - }; - - struct ViewportTransform { - enum class Swizzle : u8 { - PositiveX = 0, - NegativeX = 1, - PositiveY = 2, - NegativeY = 3, - PositiveZ = 4, - NegativeZ = 5, - PositiveW = 6, - NegativeW = 7, - }; - - float scaleX; - float scaleY; - float scaleZ; - float translateX; - float translateY; - float translateZ; - - struct { - Swizzle x : 3; - u8 _pad0_ : 1; - Swizzle y : 3; - u8 _pad1_ : 1; - Swizzle z : 3; - u8 _pad2_ : 1; - Swizzle w : 3; - u32 _pad3_ : 17; - } swizzles; - - struct { - u8 x : 5; - u8 _pad0_ : 3; - u8 y : 5; - u32 _pad1_ : 19; - } subpixelPrecisionBias; - }; - static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32))); - - struct Viewport { - struct { - u16 x; - u16 width; - }; - - struct { - u16 y; - u16 height; - }; - - float depthRangeNear; - float depthRangeFar; - }; - static_assert(sizeof(Viewport) == (0x4 * sizeof(u32))); - - enum class PolygonMode : u32 { - Point = 0x1B00, - Line = 0x1B01, - Fill = 0x1B02, - }; - - union VertexAttribute { - u32 raw; - - enum class Size : u8 { - Size_1x32 = 0x12, - Size_2x32 = 0x04, - Size_3x32 = 0x02, - Size_4x32 = 0x01, - Size_1x16 = 0x1B, - Size_2x16 = 0x0F, - Size_3x16 = 0x05, - Size_4x16 = 0x03, - Size_1x8 = 0x1D, - Size_2x8 = 0x18, - Size_3x8 = 0x13, - Size_4x8 = 0x0A, - Size_10_10_10_2 = 0x30, - Size_11_11_10 = 0x31, - }; - - enum class Type : u8 { - None = 0, - SNorm = 1, - UNorm = 2, - SInt = 3, - UInt = 4, - UScaled = 5, - SScaled = 6, - Float = 7, - }; - - struct { - u8 bufferId : 5; - u8 _pad0_ : 1; - bool fixed : 1; - u16 offset : 14; - Size size : 6; - Type type : 3; - u8 _pad1_ : 1; - bool bgra : 1; - }; - }; - static_assert(sizeof(VertexAttribute) == sizeof(u32)); - - enum class CompareOp : u32 { - Never = 1, - Less = 2, - Equal = 3, - LessOrEqual = 4, - Greater = 5, - NotEqual = 6, - GreaterOrEqual = 7, - Always = 8, - - NeverGL = 0x200, - LessGL = 0x201, - EqualGL = 0x202, - LessOrEqualGL = 0x203, - GreaterGL = 0x204, - NotEqualGL = 0x205, - GreaterOrEqualGL = 0x206, - AlwaysGL = 0x207, - }; - - struct Blend { - enum class Op : u32 { - Add = 1, - Subtract = 2, - ReverseSubtract = 3, - Minimum = 4, - Maximum = 5, - - AddGL = 0x8006, - SubtractGL = 0x8007, - ReverseSubtractGL = 0x8008, - MinimumGL = 0x800A, - MaximumGL = 0x800B, - }; - - enum class Factor : u32 { - Zero = 0x1, - One = 0x2, - SourceColor = 0x3, - OneMinusSourceColor = 0x4, - SourceAlpha = 0x5, - OneMinusSourceAlpha = 0x6, - DestAlpha = 0x7, - OneMinusDestAlpha = 0x8, - DestColor = 0x9, - OneMinusDestColor = 0xA, - SourceAlphaSaturate = 0xB, - Source1Color = 0x10, - OneMinusSource1Color = 0x11, - Source1Alpha = 0x12, - OneMinusSource1Alpha = 0x13, - ConstantColor = 0x61, - OneMinusConstantColor = 0x62, - ConstantAlpha = 0x63, - OneMinusConstantAlpha = 0x64, - - ZeroGL = 0x4000, - OneGL = 0x4001, - SourceColorGL = 0x4300, - OneMinusSourceColorGL = 0x4301, - SourceAlphaGL = 0x4302, - OneMinusSourceAlphaGL = 0x4303, - DestAlphaGL = 0x4304, - OneMinusDestAlphaGL = 0x4305, - DestColorGL = 0x4306, - OneMinusDestColorGL = 0x4307, - SourceAlphaSaturateGL = 0x4308, - ConstantColorGL = 0xC001, - OneMinusConstantColorGL = 0xC002, - ConstantAlphaGL = 0xC003, - OneMinusConstantAlphaGL = 0xC004, - Source1ColorGL = 0xC900, - OneMinusSource1ColorGL = 0xC901, - Source1AlphaGL = 0xC902, - OneMinusSource1AlphaGL = 0xC903, - }; - - struct { - u32 seperateAlpha; - Op colorOp; - Factor colorSrcFactor; - Factor colorDestFactor; - Op alphaOp; - Factor alphaSrcFactor; - Factor alphaDestFactor; - u32 _pad_; - }; - }; - static_assert(sizeof(Blend) == (sizeof(u32) * 8)); - - enum class StencilOp : u32 { - Keep = 1, - Zero = 2, - Replace = 3, - IncrementAndClamp = 4, - DecrementAndClamp = 5, - Invert = 6, - IncrementAndWrap = 7, - DecrementAndWrap = 8, - }; - - enum class FrontFace : u32 { - Clockwise = 0x900, - CounterClockwise = 0x901, - }; - - enum class CullFace : u32 { - Front = 0x404, - Back = 0x405, - FrontAndBack = 0x408, - }; - - union ColorWriteMask { - u32 raw; - - struct { - u8 r : 4; - u8 g : 4; - u8 b : 4; - u8 a : 4; - }; - }; - static_assert(sizeof(ColorWriteMask) == sizeof(u32)); - - struct SemaphoreInfo { - enum class Op : u8 { - Release = 0, - Acquire = 1, - Counter = 2, - Trap = 3, - }; - - enum class ReductionOp : u8 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - }; - - enum class Unit : u8 { - VFetch = 1, - VP = 2, - Rast = 4, - StrmOut = 5, - GP = 6, - ZCull = 7, - Prop = 10, - Crop = 15, - }; - - enum class SyncCondition : u8 { - NotEqual = 0, - GreaterThan = 1, - }; - - enum class Format : u8 { - U32 = 0, - I32 = 1, - }; - - enum class CounterType : u8 { - Zero = 0x0, - InputVertices = 0x1, - InputPrimitives = 0x3, - VertexShaderInvocations = 0x5, - GeometryShaderInvocations = 0x7, - GeometryShaderPrimitives = 0x9, - ZcullStats0 = 0xA, - TransformFeedbackPrimitivesWritten = 0xB, - ZcullStats1 = 0xC, - ZcullStats2 = 0xE, - ClipperInputPrimitives = 0xF, - ZcullStats3 = 0x10, - ClipperOutputPrimitives = 0x11, - PrimitivesGenerated = 0x12, - FragmentShaderInvocations = 0x13, - SamplesPassed = 0x15, - TransformFeedbackOffset = 0x1A, - TessControlShaderInvocations = 0x1B, - TessEvaluationShaderInvocations = 0x1D, - TessEvaluationShaderPrimitives = 0x1F, - }; - - enum class StructureSize : u8 { - FourWords = 0, - OneWord = 1, - }; - - Op op : 2; - bool flushDisable : 1; - bool reductionEnable : 1; - bool fenceEnable : 1; - u8 _pad0_ : 4; - ReductionOp reductionOp : 3; - Unit unit : 4; - SyncCondition syncCondition : 1; - Format format : 2; - u8 _pad1_ : 1; - bool awakenEnable : 1; - u8 _pad2_ : 2; - CounterType counterType : 5; - StructureSize structureSize : 1; - }; - static_assert(sizeof(SemaphoreInfo) == sizeof(u32)); - - enum class CoordOrigin : u8 { - LowerLeft = 0, - UpperLeft = 1, - }; - struct { u32 _pad0_[0x40]; // 0x0 u32 noOperation; // 0x40 @@ -384,7 +62,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 instructionRamLoad; // 0x46 u32 startAddressRamPointer; // 0x47 u32 startAddressRamLoad; // 0x48 - MmeShadowRamControl shadowRamControl; // 0x49 + type::MmeShadowRamControl shadowRamControl; // 0x49 } mme; u32 _pad2_[0x68]; // 0x4A @@ -401,16 +79,20 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 _pad3_[0x2C]; // 0xB3 u32 rasterizerEnable; // 0xDF u32 _pad4_[0x1A0]; // 0xE0 - std::array viewportTransform; // 0x280 - std::array viewport; // 0x300 + std::array viewportTransforms; // 0x280 + std::array viewports; // 0x300 u32 _pad5_[0x2B]; // 0x340 struct { - PolygonMode front; // 0x36B - PolygonMode back; // 0x36C + type::PolygonMode front; // 0x36B + type::PolygonMode back; // 0x36C } polygonMode; - u32 _pad6_[0x68]; // 0x36D + u32 _pad6_[0x13]; // 0x36D + + std::array scissors; // 0x380 + + u32 _pad6_1_[0x15]; // 0x3C0 struct { u32 compareRef; // 0x3D5 @@ -421,11 +103,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 _pad7_[0x13]; // 0x3D8 u32 rtSeparateFragData; // 0x3EB u32 _pad8_[0x6C]; // 0x3EC - std::array vertexAttributeState; // 0x458 + std::array vertexAttributeState; // 0x458 u32 _pad9_[0x4B]; // 0x478 - CompareOp depthTestFunc; // 0x4C3 + type::CompareOp depthTestFunc; // 0x4C3 float alphaTestRef; // 0x4C4 - CompareOp alphaTestFunc; // 0x4C5 + type::CompareOp alphaTestFunc; // 0x4C5 u32 drawTFBStride; // 0x4C6 struct { @@ -439,13 +121,13 @@ namespace skyline::soc::gm20b::engine::maxwell3d { struct { u32 seperateAlpha; // 0x4CF - Blend::Op colorOp; // 0x4D0 - Blend::Factor colorSrcFactor; // 0x4D1 - Blend::Factor colorDestFactor; // 0x4D2 - Blend::Op alphaOp; // 0x4D3 - Blend::Factor alphaSrcFactor; // 0x4D4 + type::Blend::Op colorOp; // 0x4D0 + type::Blend::Factor colorSrcFactor; // 0x4D1 + type::Blend::Factor colorDestFactor; // 0x4D2 + type::Blend::Op alphaOp; // 0x4D3 + type::Blend::Factor alphaSrcFactor; // 0x4D4 u32 _pad_; // 0x4D5 - Blend::Factor alphaDestFactor; // 0x4D6 + type::Blend::Factor alphaDestFactor; // 0x4D6 u32 enableCommon; // 0x4D7 std::array enable; // 0x4D8 For each render target @@ -454,12 +136,12 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 stencilEnable; // 0x4E0 struct { - StencilOp failOp; // 0x4E1 - StencilOp zFailOp; // 0x4E2 - StencilOp zPassOp; // 0x4E3 + type::StencilOp failOp; // 0x4E1 + type::StencilOp zFailOp; // 0x4E2 + type::StencilOp zPassOp; // 0x4E3 struct { - CompareOp op; // 0x4E4 + type::CompareOp op; // 0x4E4 i32 ref; // 0x4E5 u32 mask; // 0x4E6 } compare; @@ -495,7 +177,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 _pad16_[0x7]; // 0x550 struct { - Address address; // 0x557 + type::Address address; // 0x557 u32 maximumIndex; // 0x559 } texSamplerPool; @@ -504,7 +186,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 lineSmoothEnable; // 0x55C struct { - Address address; // 0x55D + type::Address address; // 0x55D u32 maximumIndex; // 0x55F } texHeaderPool; @@ -513,40 +195,40 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 stencilTwoSideEnable; // 0x565 struct { - StencilOp failOp; // 0x566 - StencilOp zFailOp; // 0x567 - StencilOp zPassOp; // 0x568 - CompareOp compareOp; // 0x569 + type::StencilOp failOp; // 0x566 + type::StencilOp zFailOp; // 0x567 + type::StencilOp zPassOp; // 0x568 + type::CompareOp compareOp; // 0x569 } stencilBack; u32 _pad19_[0x17]; // 0x56A struct { u8 _unk_ : 2; - CoordOrigin origin : 1; + type::CoordOrigin origin : 1; u16 enable : 10; u32 _pad_ : 19; } pointCoordReplace; // 0x581 u32 _pad20_[0xC4]; // 0x582 u32 cullFaceEnable; // 0x646 - FrontFace frontFace; // 0x647 - CullFace cullFace; // 0x648 + type::FrontFace frontFace; // 0x647 + type::CullFace cullFace; // 0x648 u32 pixelCentreImage; // 0x649 u32 _pad21_; // 0x64A u32 viewportTransformEnable; // 0x64B u32 _pad22_[0x34]; // 0x64A - std::array colorMask; // 0x680 For each render target + std::array colorMask; // 0x680 For each render target u32 _pad23_[0x38]; // 0x688 struct { - Address address; // 0x6C0 + type::Address address; // 0x6C0 u32 payload; // 0x6C2 - SemaphoreInfo info; // 0x6C3 + type::SemaphoreInfo info; // 0x6C3 } semaphore; u32 _pad24_[0xBC]; // 0x6C4 - std::array independentBlend; // 0x780 For each render target + std::array independentBlend; // 0x780 For each render target u32 _pad25_[0x100]; // 0x7C0 u32 firmwareCall[0x20]; // 0x8C0 }; @@ -555,9 +237,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { #pragma pack(pop) Registers registers{}; - Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register + Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register - std::array macroCode{}; //!< This stores GPU macros, writes to it will wraparound on overflow + std::array macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow Maxwell3D(const DeviceState &state);