From 395f665a1340dc87f1a2364f74a3178c8b25500e Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 31 Jul 2022 15:05:20 +0100 Subject: [PATCH] Implement a system for helper shaders together with a simple blit shader It is desirable for us to use a shader for blits to allow easily emulating out of bounds blits and blits between different swizzled colour formats. The helper shader infrastructure is designed to be generic so it can be reused by any other helper shaders that we may need in the future. --- app/CMakeLists.txt | 2 + app/src/main/cpp/skyline/gpu.cpp | 1 + app/src/main/cpp/skyline/gpu.h | 3 + .../skyline/gpu/shaders/helper_shaders.cpp | 268 ++++++++++++++++++ .../cpp/skyline/gpu/shaders/helper_shaders.h | 86 ++++++ app/src/main/cpp/skyline/os.cpp | 2 +- app/src/main/cpp/skyline/os.h | 2 +- app/src/main/shaders/blit.frag | 21 ++ app/src/main/shaders/blit.vert | 22 ++ 9 files changed, 405 insertions(+), 2 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp create mode 100644 app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h create mode 100644 app/src/main/shaders/blit.frag create mode 100644 app/src/main/shaders/blit.vert diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index f201a37a..b0e2f70c 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -175,9 +175,11 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp ${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp + ${source_DIR}/skyline/gpu/interconnect/fermi_2d.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp + ${source_DIR}/skyline/gpu/shaders/helper_shaders.cpp ${source_DIR}/skyline/soc/smmu.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/host1x/command_fifo.cpp diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 5a216ef9..9ea60fed 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -375,6 +375,7 @@ namespace skyline::gpu { megaBufferAllocator(*this), descriptor(*this), shader(state, *this), + helperShaders(*this, state.os->assetFileSystem), graphicsPipelineCache(*this), renderPassCache(*this), framebufferCache(*this) {} diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index 2b3083f1..33959cd4 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -12,6 +12,7 @@ #include "gpu/megabuffer.h" #include "gpu/descriptor_allocator.h" #include "gpu/shader_manager.h" +#include "gpu/shaders/helper_shaders.h" #include "gpu/cache/graphics_pipeline_cache.h" #include "gpu/cache/renderpass_cache.h" #include "gpu/cache/framebuffer_cache.h" @@ -50,6 +51,8 @@ namespace skyline::gpu { DescriptorAllocator descriptor; ShaderManager shader; + HelperShaders helperShaders; + cache::GraphicsPipelineCache graphicsPipelineCache; cache::RenderPassCache renderPassCache; cache::FramebufferCache framebufferCache; diff --git a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp new file mode 100644 index 00000000..1960480e --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include +#include +#include +#include "helper_shaders.h" + +namespace skyline::gpu { + static vk::raii::ShaderModule CreateShaderModule(GPU &gpu, vfs::Backing &shaderBacking) { + std::vector shaderBuf(shaderBacking.size / 4); + + if (shaderBacking.Read(span(shaderBuf)) != shaderBacking.size) + throw exception("Failed to read shader"); + + return gpu.vkDevice.createShaderModule( + { + .pCode = shaderBuf.data(), + .codeSize = shaderBacking.size, + } + ); + } + + SimpleColourRTShader::SimpleColourRTShader(GPU &gpu, std::shared_ptr vertexShader, std::shared_ptr fragmentShader) + : vertexShaderModule{CreateShaderModule(gpu, *vertexShader)}, + fragmentShaderModule{CreateShaderModule(gpu, *fragmentShader)}, + shaderStages{{ + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .pName = "main", + .module = *vertexShaderModule + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .pName = "main", + .module = *fragmentShaderModule + }} + } {} + + cache::GraphicsPipelineCache::CompiledPipeline SimpleColourRTShader::GetPipeline(GPU &gpu, + TextureView *colorAttachment, + span layoutBindings, span pushConstantRanges) { + constexpr static vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState{ + .topology = vk::PrimitiveTopology::eTriangleList, + .primitiveRestartEnable = false + }; + + constexpr static vk::PipelineTessellationStateCreateInfo tesselationState{ + .patchControlPoints = 0, + }; + + const static vk::StructureChain rasterizationState{ + { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = vk::PolygonMode::eFill, + .lineWidth = 1.0f, + .cullMode = vk::CullModeFlagBits::eNone, + .frontFace = vk::FrontFace::eCounterClockwise, + .depthBiasEnable = false + }, { + .provokingVertexMode = vk::ProvokingVertexModeEXT::eFirstVertex + } + }; + + constexpr static vk::PipelineMultisampleStateCreateInfo multisampleState{ + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false, + .minSampleShading = 1.0f, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false + }; + + constexpr static vk::PipelineDepthStencilStateCreateInfo depthStencilState{ + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + constexpr static vk::PipelineColorBlendAttachmentState attachmentState{ + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + }; + + constexpr static vk::PipelineColorBlendStateCreateInfo blendState{ + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = &attachmentState + }; + + vk::StructureChain vertexState{ + { + .vertexAttributeDescriptionCount = 0, + .vertexBindingDescriptionCount = 0 + }, {} + }; + + vertexState.unlink(); + + auto colourAttachmentDimensions{colorAttachment->texture->dimensions}; + + vk::Viewport viewport{ + .height = static_cast(colourAttachmentDimensions.height), + .width = static_cast(colourAttachmentDimensions.width), + .x = 0.0f, + .y = 0.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f + }; + + vk::Rect2D scissor{ + .extent = colourAttachmentDimensions + }; + + vk::PipelineViewportStateCreateInfo viewportState{ + .pViewports = &viewport, + .viewportCount = 1, + .pScissors = &scissor, + .scissorCount = 1 + }; + + return gpu.graphicsPipelineCache.GetCompiledPipeline(cache::GraphicsPipelineCache::PipelineState{ + .shaderStages = shaderStages, + .vertexState = vertexState, + .inputAssemblyState = inputAssemblyState, + .tessellationState = tesselationState, + .viewportState = viewportState, + .rasterizationState = rasterizationState, + .multisampleState = multisampleState, + .depthStencilState = depthStencilState, + .colorBlendState = blendState, + .colorAttachments = span{colorAttachment}, + .depthStencilAttachment = nullptr, + }, layoutBindings, pushConstantRanges); + } + + namespace glsl { + struct Vec2 { + float x, y; + }; + } + + namespace blit { + struct VertexPushConstantLayout { + glsl::Vec2 dstOriginClipSpace; + glsl::Vec2 dstDimensionsClipSpace; + }; + + struct FragmentPushConstantLayout { + glsl::Vec2 srcOriginUV; + glsl::Vec2 dstSrcScaleFactor; + float srcHeightRecip; + }; + + constexpr static std::array PushConstantRanges{ + vk::PushConstantRange{ + .stageFlags = vk::ShaderStageFlagBits::eVertex, + .size = sizeof(VertexPushConstantLayout), + .offset = 0 + }, vk::PushConstantRange{ + .stageFlags = vk::ShaderStageFlagBits::eFragment, + .size = sizeof(FragmentPushConstantLayout), + .offset = sizeof(VertexPushConstantLayout) + } + }; + + constexpr static vk::DescriptorSetLayoutBinding SamplerLayoutBinding{ + .binding = 0, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eFragment + }; + }; + + BlitHelperShader::BlitHelperShader(GPU &gpu, std::shared_ptr shaderFileSystem) + : SimpleColourRTShader{gpu, shaderFileSystem->OpenFile("shaders/blit.vert.spv"), shaderFileSystem->OpenFile("shaders/blit.frag.spv")}, + bilinearSampler{gpu.vkDevice.createSampler( + vk::SamplerCreateInfo{ + .addressModeU = vk::SamplerAddressMode::eRepeat, + .addressModeV = vk::SamplerAddressMode::eRepeat, + .addressModeW = vk::SamplerAddressMode::eRepeat, + .anisotropyEnable = false, + .compareEnable = false, + .magFilter = vk::Filter::eLinear, + .minFilter = vk::Filter::eLinear + }) + }, + nearestSampler{gpu.vkDevice.createSampler( + vk::SamplerCreateInfo{ + .addressModeU = vk::SamplerAddressMode::eRepeat, + .addressModeV = vk::SamplerAddressMode::eRepeat, + .addressModeW = vk::SamplerAddressMode::eRepeat, + .anisotropyEnable = false, + .compareEnable = false, + .magFilter = vk::Filter::eNearest, + .minFilter = vk::Filter::eNearest + }) + } {} + + void BlitHelperShader::Blit(GPU &gpu, BlitRect srcRect, BlitRect dstRect, + vk::Extent2D srcImageDimensions, vk::Extent2D dstImageDimensions, + float dstSrcScaleFactorX, float dstSrcScaleFactorY, + bool bilinearFilter, + TextureView *srcImageView, TextureView *dstImageView, + std::function &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb) { + struct DrawState { + blit::VertexPushConstantLayout vertexPushConstants; + blit::FragmentPushConstantLayout fragmentPushConstants; + DescriptorAllocator::ActiveDescriptorSet descriptorSet; + cache::GraphicsPipelineCache::CompiledPipeline pipeline; + + DrawState(GPU &gpu, + blit::VertexPushConstantLayout vertexPushConstants, blit::FragmentPushConstantLayout fragmentPushConstants, + cache::GraphicsPipelineCache::CompiledPipeline pipeline) + : vertexPushConstants{vertexPushConstants}, fragmentPushConstants{fragmentPushConstants}, + descriptorSet{gpu.descriptor.AllocateSet(pipeline.descriptorSetLayout)}, + pipeline{pipeline} {} + }; + + auto drawState{std::make_shared( + gpu, + blit::VertexPushConstantLayout{ + .dstOriginClipSpace = {(2.0f * dstRect.x) / dstImageDimensions.width - 1.0f, (2.0f * dstRect.y) / dstImageDimensions.height - 1.0f}, + .dstDimensionsClipSpace = {(2.0f * dstRect.width) / dstImageDimensions.width, (2.0f * dstRect.height) / dstImageDimensions.height} + }, blit::FragmentPushConstantLayout{ + .srcOriginUV = {srcRect.x / srcImageDimensions.width, srcRect.y / srcImageDimensions.height}, + .dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)}, + .srcHeightRecip = 1.0f / srcImageDimensions.height + }, + GetPipeline(gpu, dstImageView, {blit::SamplerLayoutBinding}, blit::PushConstantRanges)) + }; + + vk::DescriptorImageInfo imageInfo{ + .imageLayout = vk::ImageLayout::eGeneral, + .imageView = srcImageView->GetView(), + .sampler = bilinearFilter ? *bilinearSampler : *nearestSampler + }; + + std::array writes{vk::WriteDescriptorSet{ + .dstBinding = 0, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .descriptorCount = 1, + .dstSet = *drawState->descriptorSet, + .pImageInfo = &imageInfo + }}; + + gpu.vkDevice.updateDescriptorSets(writes, nullptr); + + recordCb([drawState = std::move(drawState)](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu, vk::RenderPass, u32) { + cycle->AttachObject(drawState); + commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipeline); + commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipelineLayout, 0, *drawState->descriptorSet, nullptr); + commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eVertex, 0, + vk::ArrayProxy{drawState->vertexPushConstants}); + commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, sizeof(blit::VertexPushConstantLayout), + vk::ArrayProxy{drawState->fragmentPushConstants}); + commandBuffer.draw(6, 1, 0, 0); + }); + + } + + HelperShaders::HelperShaders(GPU &gpu, std::shared_ptr shaderFileSystem) + : blitHelperShader(gpu, std::move(shaderFileSystem)) {} + +} \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h new file mode 100644 index 00000000..c0bf1e5b --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include +#include + +namespace skyline::vfs { + class FileSystem; +} + +namespace skyline::gpu { + class TextureView; + class GPU; + + /** + * @brief A base class that can be inherited by helper shaders that render to a single color rendertarget to simplify pipeline creation + */ + class SimpleColourRTShader { + protected: + vk::raii::ShaderModule vertexShaderModule; + vk::raii::ShaderModule fragmentShaderModule; + + std::array shaderStages; //!< Shader stages for the vertex and fragment shader modules + + SimpleColourRTShader(GPU &gpu, std::shared_ptr vertexShader, std::shared_ptr fragmentShader); + + /** + * @brief Returns a potentially cached pipeline built according to the supplied input state + */ + cache::GraphicsPipelineCache::CompiledPipeline GetPipeline(GPU &gpu, + TextureView *colorAttachment, + span layoutBindings, span pushConstantRanges); + }; + + /** + * @brief Simple helper shader for blitting a texture to a rendertarget with subpixel-precision + */ + class BlitHelperShader : SimpleColourRTShader { + private: + vk::raii::Sampler bilinearSampler; + vk::raii::Sampler nearestSampler; + + public: + BlitHelperShader(GPU &gpu, std::shared_ptr shaderFileSystem); + + /** + * @brief Floating point equivalent to vk::Rect2D to allow for subpixel-precison blits + */ + struct BlitRect { + float width; + float height; + float x; + float y; + }; + + /** + * @brief Records a sequenced GPU blit operation + * @param srcRect A subrect of the source input texture that will be blitted from + * @param dstRect A subrect of the destination input texture that the source subrect will be blitted into + * @param dstSrcScaleFactorX Scale factor in the X direction from the destination image to the source image + * @param dstSrcScaleFactorY ^ but Y + * @param bilinearFilter Type of filter to use for sampling the source texture, false will use nearest-neighbour and true will use bilinear filtering + * @param recordCb Callback used to record the blit commands for sequenced execution on the GPU + */ + void Blit(GPU &gpu, BlitRect srcRect, BlitRect dstRect, + vk::Extent2D srcImageDimensions, vk::Extent2D dstImageDimensions, + float dstSrcScaleFactorX, float dstSrcScaleFactorY, + bool bilinearFilter, + TextureView *srcImageView, TextureView *dstImageView, + std::function &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb); + }; + + /** + * @brief Holds all helper shaders to avoid redundantly recreating them on each usage + */ + struct HelperShaders { + BlitHelperShader blitHelperShader; + + HelperShaders(GPU &gpu, std::shared_ptr shaderFileSystem); + }; + + +} diff --git a/app/src/main/cpp/skyline/os.cpp b/app/src/main/cpp/skyline/os.cpp index 3948e1c6..3c85d766 100644 --- a/app/src/main/cpp/skyline/os.cpp +++ b/app/src/main/cpp/skyline/os.cpp @@ -24,9 +24,9 @@ namespace skyline::kernel { : nativeLibraryPath(std::move(nativeLibraryPath)), publicAppFilesPath(std::move(publicAppFilesPath)), privateAppFilesPath(std::move(privateAppFilesPath)), - state(this, jvmManager, settings), deviceTimeZone(std::move(deviceTimeZone)), assetFileSystem(std::move(assetFileSystem)), + state(this, jvmManager, settings), serviceManager(state) {} void OS::Execute(int romFd, loader::RomFormat romType) { diff --git a/app/src/main/cpp/skyline/os.h b/app/src/main/cpp/skyline/os.h index 041a5767..b46684d4 100644 --- a/app/src/main/cpp/skyline/os.h +++ b/app/src/main/cpp/skyline/os.h @@ -17,9 +17,9 @@ namespace skyline::kernel { std::string nativeLibraryPath; //!< The full path to the app's native library directory std::string publicAppFilesPath; //!< The full path to the app's public files directory std::string privateAppFilesPath; //!< The full path to the app's private files directory - DeviceState state; std::string deviceTimeZone; //!< The timezone name (e.g. Europe/London) std::shared_ptr assetFileSystem; //!< A filesystem to be used for accessing emulator assets (like tzdata) + DeviceState state; service::ServiceManager serviceManager; /** diff --git a/app/src/main/shaders/blit.frag b/app/src/main/shaders/blit.frag new file mode 100644 index 00000000..6ee85767 --- /dev/null +++ b/app/src/main/shaders/blit.frag @@ -0,0 +1,21 @@ +#version 460 + +layout (binding = 0, set = 0) uniform sampler2D src; +layout (location = 0) in vec2 dstUV; +layout (location = 0) out vec4 colour; + +layout (push_constant) uniform constants { + layout (offset = 16) + vec2 srcOriginUV; + vec2 dstSrcScaleFactor; + float srcHeightRecip; +} PC; + +void main() +{ + vec2 srcUV = dstUV * PC.dstSrcScaleFactor + PC.srcOriginUV; + // Account for out of bounds blits by moving to the next line of the source texture for the copy + srcUV.y += floor(srcUV.x) * PC.srcHeightRecip; + srcUV.x = srcUV.x - floor(srcUV.x); + colour.rgba = texture(src, srcUV); +} diff --git a/app/src/main/shaders/blit.vert b/app/src/main/shaders/blit.vert new file mode 100644 index 00000000..fce741cb --- /dev/null +++ b/app/src/main/shaders/blit.vert @@ -0,0 +1,22 @@ +#version 460 + +layout (location = 0) out vec2 dstPosition; + +layout (push_constant) uniform constants { + vec2 dstOriginClipSpace; + vec2 dstDimensionsClipSpace; +} PC; + +void main() { + const vec2 lut[6] = vec2[6]( + vec2(1, 0), + vec2(1, 1), + vec2(0, 1), + vec2(0, 1), + vec2(0, 0), + vec2(1, 0) + ); + + dstPosition = lut[gl_VertexIndex]; + gl_Position = vec4(PC.dstOriginClipSpace + PC.dstDimensionsClipSpace * lut[gl_VertexIndex], 0, 1); +}