From bb14af4f7ab6f712ecfb5d1b681155091c0d4a4b Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Thu, 6 Jan 2022 00:59:54 +0530 Subject: [PATCH] Implement Maxwell3D Sampled Textures The descriptor sets should now contain a combined image and sampler handle for any sampled textures in the guest shader from the supplied offset into the texture constant buffer. Note: Games tend to rely on inline constant buffer updates for writing the texture constant buffer and due to it not being implemented, the value will be read as 0 which is incorrect. --- .../cpp/skyline/gpu/descriptor_allocator.cpp | 6 ++- .../gpu/interconnect/command_executor.cpp | 4 ++ .../gpu/interconnect/command_executor.h | 5 ++ .../gpu/interconnect/graphics_context.h | 48 ++++++++++++++++++- .../main/cpp/skyline/gpu/texture/texture.cpp | 2 +- 5 files changed, 62 insertions(+), 3 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/descriptor_allocator.cpp b/app/src/main/cpp/skyline/gpu/descriptor_allocator.cpp index 8d1bd9cb..caf7eec9 100644 --- a/app/src/main/cpp/skyline/gpu/descriptor_allocator.cpp +++ b/app/src/main/cpp/skyline/gpu/descriptor_allocator.cpp @@ -10,12 +10,16 @@ namespace skyline::gpu { void DescriptorAllocator::AllocateDescriptorPool() { namespace maxwell3d = soc::gm20b::engine::maxwell3d::type; // We use Maxwell3D as reference for base descriptor counts - using DescriptorSizes = std::array; + using DescriptorSizes = std::array; constexpr DescriptorSizes BaseDescriptorSizes{ vk::DescriptorPoolSize{ .descriptorCount = maxwell3d::PipelineStageConstantBufferCount, .type = vk::DescriptorType::eUniformBuffer, }, + vk::DescriptorPoolSize{ + .descriptorCount = maxwell3d::PipelineStageCount * 20, + .type = vk::DescriptorType::eCombinedImageSampler, + }, }; DescriptorSizes descriptorSizes{BaseDescriptorSizes}; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 190f3100..49c159bd 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -45,6 +45,10 @@ namespace skyline::gpu::interconnect { } } + void CommandExecutor::AttachDependency(std::shared_ptr dependency) { + cycle->AttachObject(dependency); + } + void CommandExecutor::AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { for (const auto &attachments : {inputAttachments, colorAttachments}) for (const auto &attachment : attachments) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 6fef8650..3bdd3aba 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -46,6 +46,11 @@ namespace skyline::gpu::interconnect { */ void AttachBuffer(BufferView *view); + /** + * @brief Attach the lifetime of the fence cycle dependency to the command buffer + */ + void AttachDependency(std::shared_ptr dependency); + /** * @brief Adds a command that needs to be executed inside a subpass configured with certain attachments * @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 826562ba..86fe5c2b 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -679,13 +679,14 @@ namespace skyline::gpu::interconnect { constexpr static size_t MaxShaderBytecodeSize{1 * 1024 * 1024}; //!< The largest shader binary that we support (1 MiB) - constexpr static size_t PipelineUniqueDescriptorTypeCount{1}; //!< The amount of unique descriptor types that may be bound to a pipeline + constexpr static size_t PipelineUniqueDescriptorTypeCount{2}; //!< The amount of unique descriptor types that may be bound to a pipeline constexpr static size_t MaxPipelineDescriptorWriteCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The maxium amount of descriptors writes that are used to bind a pipeline constexpr static size_t MaxPipelineDescriptorCount{100}; //!< The maxium amount of descriptors we support being bound to a pipeline boost::container::static_vector descriptorSetWrites; boost::container::static_vector layoutBindings; boost::container::static_vector bufferInfo; + boost::container::static_vector imageInfo; /** * @brief All state concerning the shader programs and their bindings @@ -799,6 +800,11 @@ namespace skyline::gpu::interconnect { } } + descriptorSetWrites.clear(); + layoutBindings.clear(); + bufferInfo.clear(); + imageInfo.clear(); + runtimeInfo.previous_stage_stores.mask.set(); // First stage should always have all bits set ShaderCompiler::Backend::Bindings bindings{}; @@ -852,6 +858,46 @@ namespace skyline::gpu::interconnect { } } + if (!program.info.texture_descriptors.empty()) { + descriptorSetWrites.push_back(vk::WriteDescriptorSet{ + .dstBinding = bindingIndex, + .descriptorCount = static_cast(program.info.texture_descriptors.size()), + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .pImageInfo = imageInfo.data() + imageInfo.size(), + }); + + u32 descriptorIndex{}; + for (auto &texture : program.info.texture_descriptors) { + layoutBindings.push_back(vk::DescriptorSetLayoutBinding{ + .binding = bindingIndex++, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .descriptorCount = 1, + .stageFlags = pipelineStage.vkStage, + }); + + auto &constantBuffer{pipelineStage.constantBuffers[texture.cbuf_index]}; + union TextureHandle { + u32 raw; + struct { + u32 textureIndex : 20; + u32 samplerIndex : 12; + }; + } handle{constantBuffer.Read(texture.cbuf_offset + (descriptorIndex++ << texture.size_shift))}; + + auto sampler{GetSampler(handle.samplerIndex)}; + auto textureView{GetPoolTextureView(handle.textureIndex)}; + + std::scoped_lock lock(*textureView); + imageInfo.push_back(vk::DescriptorImageInfo{ + .sampler = **sampler, + .imageView = textureView->GetView(), + .imageLayout = textureView->texture->layout, + }); + executor.AttachTexture(textureView.get()); + executor.AttachDependency(std::move(sampler)); + } + } + shaderStagesInfo[count++] = vk::PipelineShaderStageCreateInfo{ .stage = pipelineStage.vkStage, .module = **pipelineStage.vkModule, diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 93648706..dd04ab83 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -259,7 +259,7 @@ namespace skyline::gpu { mipLevels(1), layerCount(guest->layerCount), sampleCount(vk::SampleCountFlagBits::e1) { - vk::ImageUsageFlags usage{vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst}; + vk::ImageUsageFlags usage{vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled}; if (format->vkAspect & vk::ImageAspectFlagBits::eColor) usage |= vk::ImageUsageFlagBits::eColorAttachment; if (format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil))