From 48d0b41f165e3408414e339b6d18f7a9e6d2addb Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Mon, 15 Nov 2021 23:55:32 +0530 Subject: [PATCH] Implement Maxwell3D Common/Independent Color Write Mask Maxwell3D supports both independent and common color write masks like color blending but for common color write masks rather than having register state specifically for it, the state from RT 0 is extended to all RTs. It should be noted that color write masks are included in blending state for Vulkan while being entirely independent from each other for Maxwell, it forces us to use the `independentBlend` feature even when we are doing common blending unless the color write mask is common as well but to simplify all this logic the feature was made required as it supported by effectively all targeted devices. --- .../gpu/interconnect/graphics_context.h | 16 +++++++++++ .../skyline/soc/gm20b/engines/maxwell/types.h | 8 +++--- .../skyline/soc/gm20b/engines/maxwell_3d.cpp | 27 ++++++++++++++++++- .../skyline/soc/gm20b/engines/maxwell_3d.h | 3 ++- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 08d1536b..dc296725 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -721,6 +721,22 @@ namespace skyline::gpu::interconnect { independentRtBlendState[index].dstAlphaBlendFactor = ConvertBlendFactor(factor); } + void SetColorWriteMask(u32 index, maxwell3d::ColorWriteMask mask) { + vk::ColorComponentFlags colorWriteMask{}; + if (mask.red) + colorWriteMask |= vk::ColorComponentFlagBits::eR; + if (mask.green) + colorWriteMask |= vk::ColorComponentFlagBits::eG; + if (mask.blue) + colorWriteMask |= vk::ColorComponentFlagBits::eB; + if (mask.alpha) + colorWriteMask |= vk::ColorComponentFlagBits::eA; + + // While blending state might include the color write mask on Vulkan, they are separate on Maxwell and this results in even `commonRtBlendState` requiring the `independentBlend` feature in certain circumstances where blending state might be the same but with independent color write masks + independentRtBlendState[index].colorWriteMask = colorWriteMask; + commonRtBlendState[index].colorWriteMask = colorWriteMask; + } + void SetColorBlendConstant(u32 index, float constant) { blendState.blendConstants[index] = constant; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h index be6a3c41..69bb0e67 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h @@ -419,10 +419,10 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type { u32 raw; struct { - u8 r : 4; - u8 g : 4; - u8 b : 4; - u8 a : 4; + u8 red : 4; + u8 green : 4; + u8 blue : 4; + u8 alpha : 4; }; }; static_assert(sizeof(ColorWriteMask) == sizeof(u32)); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index ffc5737b..0611d2be 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -12,7 +12,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { } __attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument, bool lastCall) { - Logger::Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument); + Logger::Error("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument); // Methods that are greater than the register size are for macro control if (method >= RegisterCount) [[unlikely]] { @@ -179,6 +179,17 @@ namespace skyline::soc::gm20b::engine::maxwell3d { static_assert(type::ViewportCount == 16 && type::ViewportCount < BOOST_PP_LIMIT_REPEAT); #undef SCISSOR_CALLBACKS + MAXWELL3D_CASE(commonColorWriteMask, { + if (commonColorWriteMask) { + auto colorWriteMask{registers.colorWriteMask[0]}; + for (u32 index{}; index != type::RenderTargetCount; index++) + context.SetColorWriteMask(index, colorWriteMask); + } else { + for (u32 index{}; index != type::RenderTargetCount; index++) + context.SetColorWriteMask(index, registers.colorWriteMask[index]); + } + }) + MAXWELL3D_CASE(renderTargetControl, { context.UpdateRenderTargetControl(renderTargetControl); }) @@ -287,6 +298,20 @@ namespace skyline::soc::gm20b::engine::maxwell3d { context.SetCullFace(cullFace); }) + #define SET_COLOR_WRITE_MASK_CALLBACK(z, index, data) \ + MAXWELL3D_ARRAY_CASE(colorWriteMask, index, { \ + if (*registers.commonColorWriteMask) \ + if (index == 0) \ + for (u32 idx{}; idx != type::RenderTargetCount; idx++) \ + context.SetColorWriteMask(idx, colorWriteMask); \ + else \ + context.SetColorWriteMask(index, colorWriteMask); \ + }) + + BOOST_PP_REPEAT(8, SET_COLOR_WRITE_MASK_CALLBACK, 2) + static_assert(type::RenderTargetCount == 8 && type::RenderTargetCount < BOOST_PP_LIMIT_REPEAT); + #undef SET_COLOR_WRITE_MASK_CALLBACK + MAXWELL3D_CASE(viewVolumeClipControl, { context.SetDepthClampEnabled(!viewVolumeClipControl.depthClampDisable); }) diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index dc51e54a..39ff7589 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -104,6 +104,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }; Register<0x3D9, TiledCacheSize> tiledCacheSize; + Register<0x3E4, u32> commonColorWriteMask; //!< If enabled, the color write masks for all RTs must be set to that of the first RT Register<0x3EB, u32> rtSeparateFragData; Register<0x458, std::array> vertexAttributeState; Register<0x487, type::RenderTargetControl> renderTargetControl; @@ -210,7 +211,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x649, u32> pixelCentreImage; Register<0x64B, u32> viewportTransformEnable; Register<0x674, type::ClearBuffers> clearBuffers; - Register<0x680, std::array> colorMask; + Register<0x680, std::array> colorWriteMask; Register<0x61F, float> depthBiasClamp; Register<0x64F, type::ViewVolumeClipControl> viewVolumeClipControl;