From de300bfdbe75c464c058de042b966eadeca8de00 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Thu, 19 May 2022 17:13:13 +0530 Subject: [PATCH] Refactor Texture Swizzling The API for texture swizzling is now more concrete and abstracted out from `GuestTexture`, this allows for neater usage in certain areas such as MaxwellDMA while having a `GuestTexture` wrapper as well allowing for neater usage in those cases. The code itself has also been cleaned up slightly with all usage of `u32`s being upgraded to `size_t` as this is simply more efficient due to the compiler not needing to emulate wraparound behavior for integer types smaller than the processor word size. --- app/CMakeLists.txt | 1 + .../main/cpp/skyline/gpu/texture/layout.cpp | 196 ++++++++++++++++++ app/src/main/cpp/skyline/gpu/texture/layout.h | 171 +++------------ .../main/cpp/skyline/gpu/texture/texture.cpp | 2 +- 4 files changed, 223 insertions(+), 147 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/texture/layout.cpp diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index b1503182..4087647e 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -166,6 +166,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/command_scheduler.cpp ${source_DIR}/skyline/gpu/descriptor_allocator.cpp ${source_DIR}/skyline/gpu/texture/texture.cpp + ${source_DIR}/skyline/gpu/texture/layout.cpp ${source_DIR}/skyline/gpu/buffer.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/shader_manager.cpp diff --git a/app/src/main/cpp/skyline/gpu/texture/layout.cpp b/app/src/main/cpp/skyline/gpu/texture/layout.cpp new file mode 100644 index 00000000..910e7a7a --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/texture/layout.cpp @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "layout.h" + +namespace skyline::gpu::texture { + // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 + constexpr u8 SectorWidth{16}; // The width of a sector in bytes + constexpr u8 SectorHeight{2}; // The height of a sector in lines + constexpr u8 GobWidth{64}; // The width of a GOB in bytes + constexpr u8 GobHeight{8}; // The height of a GOB in lines + + size_t GetBlockLinearLayerSize(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth) { + size_t robLineWidth{util::DivideCeil(dimensions.width, formatBlockWidth)}; //!< The width of the ROB in terms of format blocks + size_t robLineBytes{util::AlignUp(robLineWidth * formatBpb, GobWidth)}; //!< The amount of bytes in a single block + + size_t robHeight{GobHeight * gobBlockHeight}; //!< The height of a single ROB (Row of Blocks) in lines + size_t surfaceHeightLines{util::DivideCeil(dimensions.height, formatBlockHeight)}; //!< The height of the surface in lines + size_t surfaceHeightRobs{util::DivideCeil(surfaceHeightLines, robHeight)}; //!< The height of the surface in ROBs (Row Of Blocks, incl. padding ROB) + + size_t robDepth{util::AlignUp(dimensions.depth, gobBlockDepth)}; //!< The depth of the surface in slices, aligned to include padding Z-axis GOBs + + return robLineBytes * robHeight * surfaceHeightRobs * robDepth; + } + + /** + * @brief Copies pixel data between a linear and blocklinear texture + */ + template + void CopyBlockLinearInternal(Dimensions dimensions, + size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, + size_t gobBlockHeight, size_t gobBlockDepth, + u8 *blockLinear, u8 *linear) { + size_t robWidthUnalignedBytes{(dimensions.width / formatBlockWidth) * formatBpb}; + size_t robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)}; + size_t robWidthBlocks{robWidthUnalignedBytes / GobWidth}; + + size_t blockHeight{gobBlockHeight}; + size_t robHeight{GobHeight * blockHeight}; + size_t surfaceHeightLines{dimensions.height / formatBlockHeight}; + size_t surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs + + size_t blockDepth{std::min(dimensions.depth, gobBlockDepth)}; + size_t blockPaddingZ{SectorWidth * SectorHeight * blockHeight * (gobBlockDepth - blockDepth)}; + + bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes}; + size_t blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0}; + + size_t robBytes{robWidthUnalignedBytes * robHeight}; + size_t gobYOffset{robWidthUnalignedBytes * GobHeight}; + size_t gobZOffset{robWidthUnalignedBytes * surfaceHeightLines}; + + u8 *sector{blockLinear}; + + auto deswizzleRob{[&](u8 *linearRob, auto isLastRob, size_t blockPaddingY = 0, size_t blockExtentY = 0) { + auto deswizzleBlock{[&](u8 *linearBlock, auto copySector) __attribute__((always_inline)) { + for (size_t gobZ{}; gobZ < blockDepth; gobZ++) { // Every Block contains `blockDepth` Z-axis GOBs (Slices) + u8 *linearGob{linearBlock}; + for (size_t gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs + #pragma clang loop unroll_count(32) + for (size_t index{}; index < SectorWidth * SectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors + size_t xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis + size_t yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis + + if constexpr (!isLastRob) { + copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT); + } else { + if (gobY != blockHeight - 1 || yT < blockExtentY) + copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT); + else + sector += SectorWidth; + } + } + + linearGob += gobYOffset; // Increment the linear GOB to the next Y-axis GOB + } + + linearBlock += gobZOffset; // Increment the linear block to the next Z-axis GOB + } + + sector += blockPaddingZ; // Skip over any padding Z-axis GOBs + }}; + + for (size_t block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block) + deswizzleBlock(linearRob, [&](u8 *linearSector, size_t) __attribute__((always_inline)) { + if constexpr (BlockLinearToLinear) + std::memcpy(linearSector, sector, SectorWidth); + else + std::memcpy(sector, linearSector, SectorWidth); + sector += SectorWidth; // `sectorWidth` bytes are of sequential image data + }); + + if constexpr (isLastRob) + sector += blockPaddingY; // Skip over any padding at the end of this block + linearRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width) + } + + if (hasPaddingBlock) + deswizzleBlock(linearRob, [&](u8 *linearSector, size_t xT) __attribute__((always_inline)) { + #pragma clang loop unroll_count(4) + for (size_t pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) { + if (xT < blockPaddingOffset) + if constexpr (BlockLinearToLinear) + std::memcpy(linearSector + pixelOffset, sector, formatBpb); + else + std::memcpy(sector, linearSector + pixelOffset, formatBpb); + + sector += formatBpb; + xT += formatBpb; + } + }); + }}; + + u8 *linearRob{linear}; + for (size_t rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB) + deswizzleRob(linearRob, std::false_type{}); + linearRob += robBytes; // Increment the linear ROB to the next ROB + } + + if (surfaceHeightLines % robHeight != 0) { + blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding + + size_t alignedSurfaceLines{util::DivideCeil(dimensions.height, formatBlockHeight)}; + deswizzleRob( + linearRob, + std::true_type{}, + (gobBlockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight), // Calculate padding at the end of a block to skip + util::IsAligned(alignedSurfaceLines, GobHeight) ? GobHeight : alignedSurfaceLines - util::AlignDown(alignedSurfaceLines, GobHeight) // Calculate the line relative to the start of the last GOB that is the cut-off point for the image + ); + } + } + + void CopyBlockLinearToLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *blockLinear, u8 *linear) { + CopyBlockLinearInternal( + dimensions, + formatBlockWidth, formatBlockHeight, formatBpb, + gobBlockHeight, gobBlockDepth, + blockLinear, linear + ); + } + + void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear) { + CopyBlockLinearInternal( + guest.dimensions, + guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, + guest.tileConfig.blockHeight, guest.tileConfig.blockDepth, + blockLinear, linear + ); + } + + void CopyLinearToBlockLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *linear, u8 *blockLinear) { + CopyBlockLinearInternal( + dimensions, + formatBlockWidth, formatBlockHeight, formatBpb, + gobBlockHeight, gobBlockDepth, + blockLinear, linear + ); + } + + void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear) { + CopyBlockLinearInternal( + guest.dimensions, + guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, + guest.tileConfig.blockHeight, guest.tileConfig.blockDepth, + blockLinear, linear + ); + } + + void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { + auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data + auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data + + auto inputLine{guestInput}; + auto outputLine{linearOutput}; + + for (size_t line{}; line < guest.dimensions.height; line++) { + std::memcpy(outputLine, inputLine, sizeLine); + inputLine += sizeStride; + outputLine += sizeLine; + } + } + + void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { + auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data + auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data + + auto inputLine{linearInput}; + auto outputLine{guestOutput}; + + for (size_t line{}; line < guest.dimensions.height; line++) { + std::memcpy(outputLine, inputLine, sizeLine); + inputLine += sizeLine; + outputLine += sizeStride; + } + } +} diff --git a/app/src/main/cpp/skyline/gpu/texture/layout.h b/app/src/main/cpp/skyline/gpu/texture/layout.h index 47bd9245..e4368816 100644 --- a/app/src/main/cpp/skyline/gpu/texture/layout.h +++ b/app/src/main/cpp/skyline/gpu/texture/layout.h @@ -6,169 +6,48 @@ #include "texture.h" namespace skyline::gpu::texture { - // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 - constexpr u8 SectorWidth{16}; // The width of a sector in bytes - constexpr u8 SectorHeight{2}; // The height of a sector in lines - constexpr u8 GobWidth{64}; // The width of a GOB in bytes - constexpr u8 GobHeight{8}; // The height of a GOB in lines - - inline size_t GetBlockLinearLayerSize(const GuestTexture &guest) { - u32 blockBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; //!< The amount of bytes in a single block - - u32 robHeight{GobHeight * static_cast(guest.tileConfig.blockHeight)}; //!< The height of a single ROB (Row of Blocks) in lines - u32 surfaceHeightLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; //!< The height of the surface in lines - u32 surfaceHeightRobs{util::DivideCeil(surfaceHeightLines, robHeight)}; //!< The height of the surface in ROBs (Row Of Blocks, incl. padding ROB) - - u32 robDepth{util::AlignUp(guest.dimensions.depth, guest.tileConfig.blockDepth)}; //!< The depth of the surface in slices, aligned to include padding Z-axis GOBs - - return blockBytes * robHeight * surfaceHeightRobs * robDepth; - } + /** + * @return The size of a layer of the specified non-mipmapped block-slinear surface in bytes + */ + size_t GetBlockLinearLayerSize(Dimensions dimensions, + size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, + size_t gobBlockHeight, size_t gobBlockDepth); /** - * @brief Copies pixel data between a linear and blocklinear texture + * @brief Copies the contents of a blocklinear texture to a linear output buffer */ - template - void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) { - u32 formatBpb{guest.format->bpb}; - u32 robWidthUnalignedBytes{(guest.dimensions.width / guest.format->blockWidth) * formatBpb}; - u32 robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)}; - u32 robWidthBlocks{robWidthUnalignedBytes / GobWidth}; - - u32 blockHeight{guest.tileConfig.blockHeight}; - u32 robHeight{GobHeight * blockHeight}; - u32 surfaceHeightLines{guest.dimensions.height / guest.format->blockHeight}; - u32 surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs - - u32 blockDepth{std::min(guest.dimensions.depth, static_cast(guest.tileConfig.blockDepth))}; - u32 blockPaddingZ{SectorWidth * SectorHeight * blockHeight * (guest.tileConfig.blockDepth - blockDepth)}; - - bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes}; - u32 blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0}; - - u32 robBytes{robWidthUnalignedBytes * robHeight}; - u32 gobYOffset{robWidthUnalignedBytes * GobHeight}; - u32 gobZOffset{robWidthUnalignedBytes * surfaceHeightLines}; - - u8 *sector{blockLinear}; - - auto deswizzleRob{[&](u8 *linearRob, auto isLastRob, u32 blockPaddingY = 0, u32 blockExtentY = 0) { - auto deswizzleBlock{[&](u8 *linearBlock, auto copySector) __attribute__((always_inline)) { - for (u32 gobZ{}; gobZ < blockDepth; gobZ++) { // Every Block contains `blockDepth` Z-axis GOBs (Slices) - u8 *linearGob{linearBlock}; - for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs - #pragma clang loop unroll_count(32) - for (u32 index{}; index < SectorWidth * SectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors - u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis - u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis - - if constexpr (!isLastRob) { - copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT); - } else { - if (gobY != blockHeight - 1 || yT < blockExtentY) - copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT); - else - sector += SectorWidth; - } - } - - linearGob += gobYOffset; // Increment the linear GOB to the next Y-axis GOB - } - - linearBlock += gobZOffset; // Increment the linear block to the next Z-axis GOB - } - - sector += blockPaddingZ; // Skip over any padding Z-axis GOBs - }}; - - for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block) - deswizzleBlock(linearRob, [&](u8 *linearSector, u32) __attribute__((always_inline)) { - copyFunction(linearSector, sector, SectorWidth); - sector += SectorWidth; // `sectorWidth` bytes are of sequential image data - }); - - if constexpr (isLastRob) - sector += blockPaddingY; // Skip over any padding at the end of this block - linearRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width) - } - - if (hasPaddingBlock) - deswizzleBlock(linearRob, [&](u8 *linearSector, u32 xT) __attribute__((always_inline)) { - #pragma clang loop unroll_count(4) - for (u32 pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) { - if (xT < blockPaddingOffset) - copyFunction(linearSector + pixelOffset, sector, formatBpb); - sector += formatBpb; - xT += formatBpb; - } - }); - }}; - - u8 *linearRob{linear}; - for (u32 rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB) - deswizzleRob(linearRob, std::false_type{}); - linearRob += robBytes; // Increment the linear ROB to the next ROB - } - - if (surfaceHeightLines % robHeight != 0) { - blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding - - u32 alignedSurfaceLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; - deswizzleRob( - linearRob, - std::true_type{}, - (guest.tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight), // Calculate padding at the end of a block to skip - util::IsAligned(alignedSurfaceLines, GobHeight) ? GobHeight : alignedSurfaceLines - util::AlignDown(alignedSurfaceLines, GobHeight) // Calculate the line relative to the start of the last GOB that is the cut-off point for the image - ); - } - } + void CopyBlockLinearToLinear(Dimensions dimensions, + size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, + size_t gobBlockHeight, size_t gobBlockDepth, + u8 *blockLinear, u8 *linear); /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ - inline void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { - CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy); - } + void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear); + + /** + * @brief Copies the contents of a blocklinear texture to a linear output buffer + */ + void CopyLinearToBlockLinear(Dimensions dimensions, + size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, + size_t gobBlockHeight, size_t gobBlockDepth, + u8 *linear, u8 *blockLinear); /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ - inline void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { - CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8 *src, u8 *dst, size_t size) { - std::memcpy(dst, src, size); - }); - } + void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear); /** * @brief Copies the contents of a pitch-linear guest texture to a linear output buffer + * @note This does not support 3D textures */ - inline void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { - auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data - auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data - - auto inputLine{guestInput}; - auto outputLine{linearOutput}; - - for (u32 line{}; line < guest.dimensions.height; line++) { - std::memcpy(outputLine, inputLine, sizeLine); - inputLine += sizeStride; - outputLine += sizeLine; - } - } + void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput); /** * @brief Copies the contents of a linear buffer to a pitch-linear guest texture + * @note This does not support 3D textures */ - inline void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { - auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data - auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data - - auto inputLine{linearInput}; - auto outputLine{guestOutput}; - - for (u32 line{}; line < guest.dimensions.height; line++) { - std::memcpy(outputLine, inputLine, sizeLine); - inputLine += sizeLine; - outputLine += sizeStride; - } - } + void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput); } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 352f11a0..95d6ed68 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -22,7 +22,7 @@ namespace skyline::gpu { return layerStride = dimensions.height * tileConfig.pitch; case texture::TileMode::Block: - return layerStride = static_cast(texture::GetBlockLinearLayerSize(*this)); + return layerStride = static_cast(texture::GetBlockLinearLayerSize(dimensions, format->blockHeight, format->blockWidth, format->bpb, tileConfig.blockHeight, tileConfig.blockDepth)); } }