Implement Maxwell samplers

This commit is contained in:
Billy Laws 2022-09-22 00:13:57 +01:00
parent f1600f5ad0
commit e1717ed811
8 changed files with 280 additions and 152 deletions

View File

@ -21,10 +21,12 @@ namespace skyline::gpu::interconnect::maxwell3d {
activeState{manager, registerBundle.activeStateRegisters}, activeState{manager, registerBundle.activeStateRegisters},
clearEngineRegisters{registerBundle.clearRegisters}, clearEngineRegisters{registerBundle.clearRegisters},
constantBuffers{manager, registerBundle.constantBufferSelectorRegisters}, constantBuffers{manager, registerBundle.constantBufferSelectorRegisters},
samplers{manager, registerBundle.samplerPoolRegisters},
directState{activeState.directState} { directState{activeState.directState} {
executor.AddFlushCallback([this] { executor.AddFlushCallback([this] {
activeState.MarkAllDirty(); activeState.MarkAllDirty();
constantBuffers.MarkAllDirty(); constantBuffers.MarkAllDirty();
samplers.MarkAllDirty();
}); });
} }

View File

@ -6,6 +6,7 @@
#include "common.h" #include "common.h"
#include "active_state.h" #include "active_state.h"
#include "constant_buffers.h" #include "constant_buffers.h"
#include "samplers.h"
namespace skyline::gpu::interconnect::maxwell3d { namespace skyline::gpu::interconnect::maxwell3d {
/** /**
@ -31,6 +32,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
ActiveState::EngineRegisters activeStateRegisters; ActiveState::EngineRegisters activeStateRegisters;
ClearEngineRegisters clearRegisters; ClearEngineRegisters clearRegisters;
ConstantBufferSelectorState::EngineRegisters constantBufferSelectorRegisters; ConstantBufferSelectorState::EngineRegisters constantBufferSelectorRegisters;
SamplerPoolState::EngineRegisters samplerPoolRegisters;
}; };
private: private:
@ -38,6 +40,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
ActiveState activeState; ActiveState activeState;
ClearEngineRegisters clearEngineRegisters; ClearEngineRegisters clearEngineRegisters;
ConstantBuffers constantBuffers; ConstantBuffers constantBuffers;
Samplers samplers;
DescriptorAllocator::ActiveDescriptorSet *activeDescriptorSet{}; DescriptorAllocator::ActiveDescriptorSet *activeDescriptorSet{};

View File

@ -0,0 +1,200 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <soc/gm20b/channel.h>
#include <soc/gm20b/gmmu.h>
#include "samplers.h"
namespace skyline::gpu::interconnect::maxwell3d {
void SamplerPoolState::EngineRegisters::DirtyBind(DirtyManager &manager, dirty::Handle handle) const {
manager.Bind(handle, samplerBinding, texSamplerPool, texHeaderPool);
}
SamplerPoolState::SamplerPoolState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine) : engine{manager, dirtyHandle, engine} {}
void SamplerPoolState::Flush(InterconnectContext &ctx) {
u32 maximumIndex{engine->samplerBinding.value == engine::SamplerBinding::Value::ViaHeaderBinding ? engine->texHeaderPool.maximumIndex : engine->texSamplerPool.maximumIndex};
auto mapping{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->texSamplerPool.offset)};
texSamplers = mapping.first.subspan(mapping.second).cast<TextureSamplerControl>().first(maximumIndex + 1);
}
void SamplerPoolState::PurgeCaches() {
texSamplers = span<TextureSamplerControl>{};
}
Samplers::Samplers(DirtyManager &manager, const SamplerPoolState::EngineRegisters &engine) : samplerPool{manager, engine} {}
void Samplers::MarkAllDirty() {
samplerPool.MarkDirty(true);
}
static vk::Filter ConvertSamplerFilter(TextureSamplerControl::Filter filter) {
switch (filter) {
case TextureSamplerControl::Filter::Nearest:
return vk::Filter::eNearest;
case TextureSamplerControl::Filter::Linear:
return vk::Filter::eLinear;
}
}
static vk::SamplerMipmapMode ConvertSamplerMipFilter(TextureSamplerControl::MipFilter filter) {
switch (filter) {
// See https://github.com/yuzu-emu/yuzu/blob/5af06d14337a61d9ed1093079d13f68cbb1f5451/src/video_core/renderer_vulkan/maxwell_to_vk.cpp#L35
case TextureSamplerControl::MipFilter::None:
return vk::SamplerMipmapMode::eNearest;
case TextureSamplerControl::MipFilter::Nearest:
return vk::SamplerMipmapMode::eNearest;
case TextureSamplerControl::MipFilter::Linear:
return vk::SamplerMipmapMode::eLinear;
}
}
static vk::SamplerAddressMode ConvertSamplerAddressMode(TextureSamplerControl::AddressMode mode) {
switch (mode) {
case TextureSamplerControl::AddressMode::Repeat:
return vk::SamplerAddressMode::eRepeat;
case TextureSamplerControl::AddressMode::MirroredRepeat:
return vk::SamplerAddressMode::eMirroredRepeat;
case TextureSamplerControl::AddressMode::ClampToEdge:
return vk::SamplerAddressMode::eClampToEdge;
case TextureSamplerControl::AddressMode::ClampToBorder:
return vk::SamplerAddressMode::eClampToBorder;
case TextureSamplerControl::AddressMode::Clamp:
return vk::SamplerAddressMode::eClampToEdge; // Vulkan doesn't support 'GL_CLAMP' so this is an approximation
case TextureSamplerControl::AddressMode::MirrorClampToEdge:
return vk::SamplerAddressMode::eMirrorClampToEdge;
case TextureSamplerControl::AddressMode::MirrorClampToBorder:
return vk::SamplerAddressMode::eMirrorClampToEdge; // Only supported mirror clamps are to edges so this is an approximation
case TextureSamplerControl::AddressMode::MirrorClamp:
return vk::SamplerAddressMode::eMirrorClampToEdge; // Same as above
}
}
static vk::CompareOp ConvertSamplerCompareOp(TextureSamplerControl::CompareOp compareOp) {
switch (compareOp) {
case TextureSamplerControl::CompareOp::Never:
return vk::CompareOp::eNever;
case TextureSamplerControl::CompareOp::Less:
return vk::CompareOp::eLess;
case TextureSamplerControl::CompareOp::Equal:
return vk::CompareOp::eEqual;
case TextureSamplerControl::CompareOp::LessOrEqual:
return vk::CompareOp::eLessOrEqual;
case TextureSamplerControl::CompareOp::Greater:
return vk::CompareOp::eGreater;
case TextureSamplerControl::CompareOp::NotEqual:
return vk::CompareOp::eNotEqual;
case TextureSamplerControl::CompareOp::GreaterOrEqual:
return vk::CompareOp::eGreaterOrEqual;
case TextureSamplerControl::CompareOp::Always:
return vk::CompareOp::eAlways;
}
}
static vk::SamplerReductionMode ConvertSamplerReductionFilter(TextureSamplerControl::SamplerReduction reduction) {
switch (reduction) {
case TextureSamplerControl::SamplerReduction::WeightedAverage:
return vk::SamplerReductionMode::eWeightedAverage;
case TextureSamplerControl::SamplerReduction::Min:
return vk::SamplerReductionMode::eMin;
case TextureSamplerControl::SamplerReduction::Max:
return vk::SamplerReductionMode::eMax;
}
}
static vk::BorderColor ConvertBorderColorWithCustom(float red, float green, float blue, float alpha) {
if (alpha == 1.0f) {
if (red == 1.0f && green == 1.0f && blue == 1.0f)
return vk::BorderColor::eFloatOpaqueWhite;
else if (red == 0.0f && green == 0.0f && blue == 0.0f)
return vk::BorderColor::eFloatOpaqueBlack;
} else if (red == 1.0f && green == 1.0f && blue == 1.0f && alpha == 0.0f) {
return vk::BorderColor::eFloatTransparentBlack;
}
return vk::BorderColor::eFloatCustomEXT;
}
static vk::BorderColor ConvertBorderColorFixed(float red, float green, float blue, float alpha) {
if (alpha == 1.0f) {
if (red == 1.0f && green == 1.0f && blue == 1.0f)
return vk::BorderColor::eFloatOpaqueWhite;
else if (red == 0.0f && green == 0.0f && blue == 0.0f)
return vk::BorderColor::eFloatOpaqueBlack;
} else if (red == 1.0f && green == 1.0f && blue == 1.0f && alpha == 0.0f) {
return vk::BorderColor::eFloatTransparentBlack;
}
// Approximations of a custom color using fixed colors
if (red + green + blue > 1.0f)
return vk::BorderColor::eFloatOpaqueWhite;
else if (alpha > 0.0f)
return vk::BorderColor::eFloatOpaqueBlack;
else
return vk::BorderColor::eFloatTransparentBlack;
}
std::shared_ptr<vk::raii::Sampler> Samplers::GetSampler(InterconnectContext &ctx, u32 index) {
auto texSamplers{samplerPool.UpdateGet(ctx).texSamplers};
TextureSamplerControl &texSampler{texSamplers[index]};
auto &sampler{texSamplerCache[texSampler]};
if (sampler)
return sampler;
auto convertAddressModeWithCheck{[&](TextureSamplerControl::AddressMode mode) {
auto vkMode{ConvertSamplerAddressMode(mode)};
if (vkMode == vk::SamplerAddressMode::eMirrorClampToEdge && !ctx.gpu.traits.supportsSamplerMirrorClampToEdge) [[unlikely]] {
Logger::Warn("Cannot use Mirror Clamp To Edge as Sampler Address Mode without host GPU support");
return vk::SamplerAddressMode::eClampToEdge; // We use a normal clamp to edge to approximate it
}
return vkMode;
}};
auto maxAnisotropy{texSampler.MaxAnisotropy()};
vk::StructureChain<vk::SamplerCreateInfo, vk::SamplerReductionModeCreateInfoEXT, vk::SamplerCustomBorderColorCreateInfoEXT> samplerInfo{
vk::SamplerCreateInfo{
.magFilter = ConvertSamplerFilter(texSampler.magFilter),
.minFilter = ConvertSamplerFilter(texSampler.minFilter),
.mipmapMode = ConvertSamplerMipFilter(texSampler.mipFilter),
.addressModeU = convertAddressModeWithCheck(texSampler.addressModeU),
.addressModeV = convertAddressModeWithCheck(texSampler.addressModeV),
.addressModeW = convertAddressModeWithCheck(texSampler.addressModeP),
.mipLodBias = texSampler.MipLodBias(),
.anisotropyEnable = ctx.gpu.traits.supportsAnisotropicFiltering && maxAnisotropy > 1.0f,
.maxAnisotropy = maxAnisotropy,
.compareEnable = texSampler.depthCompareEnable,
.compareOp = ConvertSamplerCompareOp(texSampler.depthCompareOp),
.minLod = texSampler.mipFilter == TextureSamplerControl::MipFilter::None ? 0.0f : texSampler.MinLodClamp(),
.maxLod = texSampler.mipFilter == TextureSamplerControl::MipFilter::None ? 0.25f : texSampler.MaxLodClamp(),
.unnormalizedCoordinates = false,
}, vk::SamplerReductionModeCreateInfoEXT{
.reductionMode = ConvertSamplerReductionFilter(texSampler.reductionFilter),
}, vk::SamplerCustomBorderColorCreateInfoEXT{
.customBorderColor.float32 = {{texSampler.borderColorR, texSampler.borderColorG, texSampler.borderColorB, texSampler.borderColorA}},
.format = vk::Format::eUndefined,
},
};
if (!ctx.gpu.traits.supportsSamplerReductionMode)
samplerInfo.unlink<vk::SamplerReductionModeCreateInfoEXT>();
vk::BorderColor &borderColor{samplerInfo.get<vk::SamplerCreateInfo>().borderColor};
if (ctx.gpu.traits.supportsCustomBorderColor) {
borderColor = ConvertBorderColorWithCustom(texSampler.borderColorR, texSampler.borderColorG, texSampler.borderColorB, texSampler.borderColorA);
if (borderColor != vk::BorderColor::eFloatCustomEXT)
samplerInfo.unlink<vk::SamplerCustomBorderColorCreateInfoEXT>();
} else {
borderColor = ConvertBorderColorFixed(texSampler.borderColorR, texSampler.borderColorG, texSampler.borderColorB, texSampler.borderColorA);
samplerInfo.unlink<vk::SamplerCustomBorderColorCreateInfoEXT>();
}
sampler = std::make_shared<vk::raii::Sampler>(ctx.gpu.vkDevice, samplerInfo.get<vk::SamplerCreateInfo>());
return sampler;
}
}

View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <tsl/robin_map.h>
#include "common.h"
#include "tsc.h"
namespace skyline::gpu::interconnect::maxwell3d {
class SamplerPoolState : dirty::CachedManualDirty {
public:
struct EngineRegisters {
const engine::SamplerBinding &samplerBinding;
const engine::TexSamplerPool &texSamplerPool;
const engine::TexHeaderPool &texHeaderPool;
void DirtyBind(DirtyManager &manager, dirty::Handle handle) const;
};
private:
dirty::BoundSubresource<EngineRegisters> engine;
public:
span<TextureSamplerControl> texSamplers;
SamplerPoolState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine);
void Flush(InterconnectContext &ctx);
void PurgeCaches();
};
class Samplers {
private:
dirty::ManualDirtyState<SamplerPoolState> samplerPool;
tsl::robin_map<TextureSamplerControl, std::shared_ptr<vk::raii::Sampler>, util::ObjectHash<TextureSamplerControl>> texSamplerCache;
public:
Samplers(DirtyManager &manager, const SamplerPoolState::EngineRegisters &engine);
void MarkAllDirty();
std::shared_ptr<vk::raii::Sampler> GetSampler(InterconnectContext &ctx, u32 index);
};
}

View File

@ -1,140 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#pragma once
#include <common/base.h>
namespace skyline::gpu::interconnect {
#pragma pack(push, 1)
/**
* @brief The Texture Sampler Control is a descriptor used to configure the texture sampler in Maxwell GPUs
* @url https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_texture.xml#L367
* @url https://github.com/devkitPro/deko3d/blob/00c12d1f4809014f1cc22719dd2e3476735eec64/source/maxwell/texture_sampler_control_block.h
*/
struct TextureSamplerControl {
enum class AddressMode : u32 {
Repeat = 0,
MirroredRepeat = 1,
ClampToEdge = 2,
ClampToBorder = 3,
Clamp = 4,
MirrorClampToEdge = 5,
MirrorClampToBorder = 6,
MirrorClamp = 7,
};
enum class CompareOp : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessOrEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterOrEqual = 6,
Always = 7,
};
enum class Filter : u32 {
Nearest = 1,
Linear = 2,
};
enum class MipFilter : u32 {
None = 1,
Nearest = 2,
Linear = 3,
};
enum class SamplerReduction : u32 {
WeightedAverage = 0,
Min = 1,
Max = 2,
};
// 0x00
AddressMode addressModeU : 3;
AddressMode addressModeV : 3;
AddressMode addressModeP : 3;
u32 depthCompareEnable : 1;
CompareOp depthCompareOp : 3;
u32 srgbConversion : 1;
u32 fontFilterWidth : 3;
u32 fontFilterHeight : 3;
u32 maxAnisotropy : 3;
u32 _pad0_ : 9;
// 0x04
Filter magFilter : 2;
u32 _pad1_ : 2;
Filter minFilter : 2;
MipFilter mipFilter : 2;
u32 cubemapAnisotropy : 1;
u32 cubemapInterfaceFiltering : 1;
SamplerReduction reductionFilter : 2;
i32 mipLodBias : 13;
u32 floatCoordNormalization : 1;
u32 trilinearOptimization : 5;
u32 _pad2_ : 1;
// 0x08
u32 minLodClamp : 12;
u32 maxLodClamp : 12;
u32 srgbBorderColorR : 8;
// 0x0C
u32 _pad3_ : 12;
u32 srgbBorderColorG : 8;
u32 srgbBorderColorB : 8;
u32 _pad4_ : 4;
// 0x10
float borderColorR;
// 0x14
float borderColorG;
// 0x18
float borderColorB;
// 0x1C
float borderColorA;
private:
/**
* @brief Convert a fixed point integer to a floating point integer
*/
template<typename T, size_t FractionalBits = 8>
float ConvertFixedToFloat(T fixed) {
return static_cast<float>(fixed) / static_cast<float>(1 << FractionalBits);
};
public:
bool operator==(const TextureSamplerControl&) const = default;
float MaxAnisotropy() {
constexpr size_t AnisotropyCount{8}; //!< The amount of unique anisotropy values that can be represented (2^3 — 3-bit value)
constexpr std::array<float, AnisotropyCount> anisotropyLut{
1.0f, 3.14f, 5.28f, 7.42f, 9.57f, 11.71f, 13.85f, 16.0f
}; //!< A linear mapping of value range (0..7) to anisotropy range (1..16) calculated using `(index * 15 / 7) + 1`
return anisotropyLut[maxAnisotropy];
}
float MipLodBias() {
return ConvertFixedToFloat(mipLodBias);
}
float MinLodClamp() {
return ConvertFixedToFloat(minLodClamp);
}
float MaxLodClamp() {
return ConvertFixedToFloat(maxLodClamp);
}
};
static_assert(sizeof(TextureSamplerControl) == 0x20);
#pragma pack(pop)
}

View File

@ -545,6 +545,29 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
}; };
static_assert(sizeof(MultisampleControl) == sizeof(u32)); static_assert(sizeof(MultisampleControl) == sizeof(u32));
struct SamplerBinding {
enum class Value : u8 {
Independently = 0,
ViaHeaderBinding = 1
};
Value value : 1;
u32 _pad_ : 31;
};
static_assert(sizeof(SamplerBinding) == sizeof(u32));
struct TexSamplerPool {
Address offset;
u32 maximumIndex;
};
static_assert(sizeof(TexSamplerPool) == sizeof(u32) * 3);
struct TexHeaderPool {
Address offset;
u32 maximumIndex;
};
static_assert(sizeof(TexHeaderPool) == sizeof(u32) * 3);
enum class CompareFunc : u32 { enum class CompareFunc : u32 {
D3DNever = 1, D3DNever = 1,
D3DLess = 2, D3DLess = 2,

View File

@ -47,7 +47,8 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
return { return {
.activeStateRegisters = MakeActiveStateRegisters(registers), .activeStateRegisters = MakeActiveStateRegisters(registers),
.clearRegisters = {registers.scissors[0], registers.viewportClips[0], *registers.clearRect, *registers.colorClearValue, *registers.zClearValue, *registers.stencilClearValue, *registers.surfaceClip, *registers.clearSurfaceControl}, .clearRegisters = {registers.scissors[0], registers.viewportClips[0], *registers.clearRect, *registers.colorClearValue, *registers.zClearValue, *registers.stencilClearValue, *registers.surfaceClip, *registers.clearSurfaceControl},
.constantBufferSelectorRegisters = {*registers.constantBufferSelector} .constantBufferSelectorRegisters = {*registers.constantBufferSelector},
.samplerPoolRegisters = {*registers.samplerBinding, *registers.texSamplerPool, *registers.texHeaderPool}
}; };
} }
#undef REGTYPE #undef REGTYPE

View File

@ -189,7 +189,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x48A, type::ZtSize> ztSize; Register<0x48A, type::ZtSize> ztSize;
Register<0x48D, bool> linkedTscHandle; //!< If enabled, the TSC index in a bindless texture handle is ignored and the TIC index is used as the TSC index, otherwise the TSC index from the bindless texture handle is used Register<0x48D, type::SamplerBinding> samplerBinding; //!< If enabled, the TSC index in a bindless texture handle is ignored and the TIC index is used as the TSC index, otherwise the TSC index from the bindless texture handle is used
Register<0x490, std::array<u32, 8>> postVtgShaderAttributeSkipMask; Register<0x490, std::array<u32, 8>> postVtgShaderAttributeSkipMask;
@ -235,20 +235,12 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x54F, type::MultisampleControl> multisampleControl; Register<0x54F, type::MultisampleControl> multisampleControl;
struct SamplerPool { Register<0x557, type::TexSamplerPool> texSamplerPool;
Address address; // 0x557
u32 maximumIndex; // 0x559
};
Register<0x557, SamplerPool> samplerPool;
Register<0x55B, float> slopeScaleDepthBias; Register<0x55B, float> slopeScaleDepthBias;
Register<0x55C, u32> aliasedLineWidthEnable; Register<0x55C, u32> aliasedLineWidthEnable;
struct TexturePool { Register<0x55D, type::TexHeaderPool> texHeaderPool;
Address address; // 0x55D
u32 maximumIndex; // 0x55F
};
Register<0x55D, TexturePool> texturePool;
Register<0x565, u32> twoSidedStencilTestEnable; //!< Determines if the back-facing stencil state uses the front facing stencil state or independent stencil state Register<0x565, u32> twoSidedStencilTestEnable; //!< Determines if the back-facing stencil state uses the front facing stencil state or independent stencil state