Add Vulkan stride dynamic state and robustness support

Fixes the waterfall in SMO by specifying vertex buffer bounds.
This commit is contained in:
Billy Laws 2022-11-18 21:30:34 +00:00
parent 23a7f70a8e
commit 8f0a6e78c5
9 changed files with 53 additions and 15 deletions

View File

@ -233,7 +233,9 @@ namespace skyline::gpu {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDeviceImagelessFramebufferFeatures, vk::PhysicalDeviceImagelessFramebufferFeatures,
vk::PhysicalDeviceTransformFeedbackFeaturesEXT, vk::PhysicalDeviceTransformFeedbackFeaturesEXT,
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>()}; vk::PhysicalDeviceIndexTypeUint8FeaturesEXT,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceRobustness2FeaturesEXT>()};
decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features
#define FEAT_REQ(structName, feature) \ #define FEAT_REQ(structName, feature) \

View File

@ -31,9 +31,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber); if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber);
megaBufferBinding) megaBufferBinding)
builder.SetVertexBuffer(index, megaBufferBinding); builder.SetVertexBuffer(index, megaBufferBinding, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride);
else else
builder.SetVertexBuffer(index, *view); builder.SetVertexBuffer(index, *view, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride);
return; return;
} else { } else {
@ -41,9 +41,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
} }
// TODO: null descriptor
megaBufferBinding = {}; megaBufferBinding = {};
builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer}); if (ctx.gpu.traits.supportsNullDescriptor)
builder.SetVertexBuffer(index, BufferBinding{});
else
builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer});
} }
bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder) { bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder) {

View File

@ -247,7 +247,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (oldPipeline != pipeline) if (oldPipeline != pipeline)
// If the pipeline has changed, we need to update the pipeline state // If the pipeline has changed, we need to update the pipeline state
builder.SetPipeline(pipeline->compiledPipeline.pipeline); builder.SetPipeline(pipeline->compiledPipeline.pipeline, vk::PipelineBindPoint::eGraphics);
if (descUpdateInfo) { if (descUpdateInfo) {
if (ctx.gpu.traits.supportsPushDescriptors) { if (ctx.gpu.traits.supportsPushDescriptors) {

View File

@ -19,7 +19,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
void PackedPipelineState::SetVertexBinding(u32 index, engine::VertexStream stream, engine::VertexStreamInstance instance) { void PackedPipelineState::SetVertexBinding(u32 index, engine::VertexStream stream, engine::VertexStreamInstance instance) {
vertexBindings[index].stride = stream.format.stride; if (!dynamicStateActive)
vertexStrides[index] = stream.format.stride;
vertexBindings[index].inputRate = static_cast<u8>(instance.isInstanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex); vertexBindings[index].inputRate = static_cast<u8>(instance.isInstanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex);
vertexBindings[index].enable = stream.format.enable; vertexBindings[index].enable = stream.format.enable;
vertexBindings[index].divisor = stream.frequency; vertexBindings[index].divisor = stream.frequency;

View File

@ -58,6 +58,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
u8 alphaFunc : 3; //!< Use {Set,Get}AlphaFunc u8 alphaFunc : 3; //!< Use {Set,Get}AlphaFunc
bool alphaTestEnable : 1; bool alphaTestEnable : 1;
bool depthClampEnable : 1; // Use SetDepthClampEnable bool depthClampEnable : 1; // Use SetDepthClampEnable
bool dynamicStateActive : 1;
}; };
u32 patchSize; u32 patchSize;
@ -69,10 +70,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
std::array<u32, 8> postVtgShaderAttributeSkipMask; std::array<u32, 8> postVtgShaderAttributeSkipMask;
struct VertexBinding { struct VertexBinding {
u16 stride : 12;
u8 inputRate : 1; u8 inputRate : 1;
bool enable : 1; bool enable : 1;
u8 _pad_ : 2;
u32 divisor; u32 divisor;
vk::VertexInputRate GetInputRate() const { vk::VertexInputRate GetInputRate() const {
@ -95,6 +94,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
std::array<AttachmentBlendState, engine::ColorTargetCount> attachmentBlendStates; std::array<AttachmentBlendState, engine::ColorTargetCount> attachmentBlendStates;
std::array<u16, engine::VertexStreamCount> vertexStrides; //!< Use {Set, Get}VertexBinding
struct TransformFeedbackVarying { struct TransformFeedbackVarying {
u16 stride; u16 stride;
u8 offsetWords; u8 offsetWords;
@ -149,6 +150,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
// Only hash transform feedback state if it's enabled // Only hash transform feedback state if it's enabled
if (other.transformFeedbackEnable && transformFeedbackEnable) if (other.transformFeedbackEnable && transformFeedbackEnable)
return std::memcmp(this, &other, sizeof(PackedPipelineState)) == 0; return std::memcmp(this, &other, sizeof(PackedPipelineState)) == 0;
else if (dynamicStateActive)
return std::memcmp(this, &other, offsetof(PackedPipelineState, vertexStrides)) == 0;
else else
return std::memcmp(this, &other, offsetof(PackedPipelineState, transformFeedbackVaryings)) == 0; return std::memcmp(this, &other, offsetof(PackedPipelineState, transformFeedbackVaryings)) == 0;
} }
@ -159,6 +162,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
// Only hash transform feedback state if it's enabled // Only hash transform feedback state if it's enabled
if (state.transformFeedbackEnable) if (state.transformFeedbackEnable)
return XXH64(&state, sizeof(PackedPipelineState), 0); return XXH64(&state, sizeof(PackedPipelineState), 0);
else if (state.dynamicStateActive)
return XXH64(&state, offsetof(PackedPipelineState, vertexStrides), 0);
return XXH64(&state, offsetof(PackedPipelineState, transformFeedbackVaryings), 0); return XXH64(&state, offsetof(PackedPipelineState, transformFeedbackVaryings), 0);
} }

View File

@ -442,7 +442,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
const auto &binding{packedState.vertexBindings[i]}; const auto &binding{packedState.vertexBindings[i]};
bindingDescs.push_back({ bindingDescs.push_back({
.binding = i, .binding = i,
.stride = binding.stride, .stride = packedState.vertexStrides[i],
.inputRate = binding.GetInputRate(), .inputRate = binding.GetInputRate(),
}); });
@ -535,7 +535,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
.pAttachments = attachmentBlendStates.data() .pAttachments = attachmentBlendStates.data()
}; };
constexpr std::array<vk::DynamicState, 9> dynamicStates{ constexpr std::array<vk::DynamicState, 10> dynamicStates{
vk::DynamicState::eViewport, vk::DynamicState::eViewport,
vk::DynamicState::eScissor, vk::DynamicState::eScissor,
vk::DynamicState::eLineWidth, vk::DynamicState::eLineWidth,
@ -544,11 +544,16 @@ namespace skyline::gpu::interconnect::maxwell3d {
vk::DynamicState::eDepthBounds, vk::DynamicState::eDepthBounds,
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilCompareMask,
vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilWriteMask,
vk::DynamicState::eStencilReference vk::DynamicState::eStencilReference,
// VK_EXT_dynamic_state starts here
vk::DynamicState::eVertexInputBindingStrideEXT
}; };
static constexpr u32 BaseDynamicStateCount{9};
static constexpr u32 ExtendedDynamicStateCount{BaseDynamicStateCount + 1};
vk::PipelineDynamicStateCreateInfo dynamicState{ vk::PipelineDynamicStateCreateInfo dynamicState{
.dynamicStateCount = static_cast<u32>(dynamicStates.size()), .dynamicStateCount = ctx.gpu.traits.supportsExtendedDynamicState ? ExtendedDynamicStateCount : BaseDynamicStateCount,
.pDynamicStates = dynamicStates.data() .pDynamicStates = dynamicStates.data()
}; };

View File

@ -488,6 +488,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
ctSelect{engine.ctSelect} {} ctSelect{engine.ctSelect} {}
void PipelineState::Flush(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder) { void PipelineState::Flush(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder) {
packedState.dynamicStateActive = ctx.gpu.traits.supportsExtendedDynamicState;
std::array<ShaderBinary, engine::PipelineCount> shaderBinaries; std::array<ShaderBinary, engine::PipelineCount> shaderBinaries;
for (size_t i{}; i < engine::PipelineCount; i++) { for (size_t i{}; i < engine::PipelineCount; i++) {
const auto &stage{pipelineStages[i].UpdateGet(ctx)}; const auto &stage{pipelineStages[i].UpdateGet(ctx)};

View File

@ -6,7 +6,7 @@
namespace skyline::gpu { namespace skyline::gpu {
TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice &physicalDevice) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) { TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice &physicalDevice) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) {
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}, hasTransformFeedbackExt{}, hasUint8IndicesExt{}; bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}, hasTransformFeedbackExt{}, hasUint8IndicesExt{}, hasExtendedDynamicStateExt{}, hasRobustness2Ext{};
bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present
for (auto &extension : deviceExtensions) { for (auto &extension : deviceExtensions) {
@ -57,6 +57,8 @@ namespace skyline::gpu {
EXT_SET("VK_KHR_uniform_buffer_standard_layout", supportsUniformBufferStandardLayout); EXT_SET("VK_KHR_uniform_buffer_standard_layout", supportsUniformBufferStandardLayout);
EXT_SET("VK_EXT_primitive_topology_list_restart", hasPrimitiveTopologyListRestartExt); EXT_SET("VK_EXT_primitive_topology_list_restart", hasPrimitiveTopologyListRestartExt);
EXT_SET("VK_EXT_transform_feedback", hasTransformFeedbackExt); EXT_SET("VK_EXT_transform_feedback", hasTransformFeedbackExt);
EXT_SET("VK_EXT_extended_dynamic_state", hasExtendedDynamicStateExt);
EXT_SET("VK_EXT_robustness2", hasRobustness2Ext);
} }
#undef EXT_SET #undef EXT_SET
@ -83,6 +85,20 @@ namespace skyline::gpu {
else else
enabledFeatures2.unlink<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>(); enabledFeatures2.unlink<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
if (hasExtendedDynamicStateExt)
FEAT_SET(vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, extendedDynamicState, supportsExtendedDynamicState)
else
enabledFeatures2.unlink<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>();
if (hasRobustness2Ext) {
FEAT_SET(vk::PhysicalDeviceRobustness2FeaturesEXT, nullDescriptor, supportsNullDescriptor)
FEAT_SET(vk::PhysicalDeviceRobustness2FeaturesEXT, robustBufferAccess2, std::ignore)
FEAT_SET(vk::PhysicalDeviceRobustness2FeaturesEXT, robustImageAccess2, std::ignore)
} else {
enabledFeatures2.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
if (hasCustomBorderColorExt) { if (hasCustomBorderColorExt) {
bool hasCustomBorderColorFeature{}; bool hasCustomBorderColorFeature{};
FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColors, hasCustomBorderColorFeature) FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColors, hasCustomBorderColorFeature)

View File

@ -46,6 +46,8 @@ namespace skyline::gpu {
bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote
bool supportsWideLines{}; //!< If the device supports the 'wideLines' Vulkan feature bool supportsWideLines{}; //!< If the device supports the 'wideLines' Vulkan feature
bool supportsDepthClamp{}; //!< If the device supports the 'depthClamp' Vulkan feature bool supportsDepthClamp{}; //!< If the device supports the 'depthClamp' Vulkan feature
bool supportsExtendedDynamicState{}; //!< If the device supports the 'VK_EXT_extended_dynamic_state' Vulkan extension
bool supportsNullDescriptor{}; //!< If the device supports the null descriptor feature in the 'VK_EXT_robustness2' Vulkan extension
u32 subgroupSize{}; //!< Size of a subgroup on the host GPU u32 subgroupSize{}; //!< Size of a subgroup on the host GPU
std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7 std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7
@ -98,7 +100,9 @@ namespace skyline::gpu {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDeviceImagelessFramebufferFeatures, vk::PhysicalDeviceImagelessFramebufferFeatures,
vk::PhysicalDeviceTransformFeedbackFeaturesEXT, vk::PhysicalDeviceTransformFeedbackFeaturesEXT,
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>; vk::PhysicalDeviceIndexTypeUint8FeaturesEXT,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceRobustness2FeaturesEXT>;
TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice& physicalDevice); TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice& physicalDevice);