Implement Array Texture Swizzling

Textures can have more than one layer which we currently don't handle, all layers past the initial one will be filled with random data or 0s, leading to incorrect rendering. This has now been implemented now which fixes any titles which utilize array textures, such as "Super Mario Odyssey" or "Hatsune Miku: Project DIVA MegaMix".
This commit is contained in:
PixelyIon 2022-05-12 18:09:41 +05:30
parent 2a99e1784d
commit f2cc25ee9f
5 changed files with 53 additions and 32 deletions

View File

@ -113,6 +113,7 @@ namespace skyline::gpu::interconnect {
bool disabled{true}; //!< If this RT has been disabled and will be an unbound attachment instead bool disabled{true}; //!< If this RT has been disabled and will be an unbound attachment instead
IOVA iova{}; IOVA iova{};
u32 widthBytes{}; //!< The width in bytes for linear textures u32 widthBytes{}; //!< The width in bytes for linear textures
u32 layerStride{}; //!< The stride of a single layer in bytes
bool is3d{}; //!< If the RT is 3D, this controls if the RT is 3D or layered bool is3d{}; //!< If the RT is 3D, this controls if the RT is 3D or layered
GuestTexture guest{}; GuestTexture guest{};
std::shared_ptr<TextureView> view{}; std::shared_ptr<TextureView> view{};
@ -370,7 +371,7 @@ namespace skyline::gpu::interconnect {
} }
void SetRenderTargetLayerStride(RenderTarget &renderTarget, u32 layerStrideLsr2) { void SetRenderTargetLayerStride(RenderTarget &renderTarget, u32 layerStrideLsr2) {
renderTarget.guest.layerStride = layerStrideLsr2 << 2; renderTarget.layerStride = layerStrideLsr2 << 2;
renderTarget.view.reset(); renderTarget.view.reset();
} }
@ -397,8 +398,13 @@ namespace skyline::gpu::interconnect {
else if (renderTarget.view) else if (renderTarget.view)
return &*renderTarget.view; return &*renderTarget.view;
if (renderTarget.guest.baseArrayLayer > 0 || renderTarget.guest.layerCount > 1)
renderTarget.guest.layerStride = renderTarget.layerStride; // Games can supply a layer stride that may include intentional padding which can contain additional mip layers
else
renderTarget.guest.layerStride = 0; // We want to explicitly reset the stride to 0 for non-array textures
if (renderTarget.guest.mappings.empty()) { if (renderTarget.guest.mappings.empty()) {
size_t layerStride{renderTarget.guest.GetLayerSize()}; size_t layerStride{renderTarget.guest.GetLayerStride()};
size_t size{layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer)}; size_t size{layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer)};
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(renderTarget.iova, size)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(renderTarget.iova, size)};
renderTarget.guest.mappings.assign(mappings.begin(), mappings.end()); renderTarget.guest.mappings.assign(mappings.begin(), mappings.end());
@ -2323,7 +2329,7 @@ namespace skyline::gpu::interconnect {
throw exception("Unsupported TIC Header Type: {}", static_cast<u32>(textureControl.headerType)); throw exception("Unsupported TIC Header Type: {}", static_cast<u32>(textureControl.headerType));
} }
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(textureControl.Iova(), guest.GetLayerSize() * (guest.layerCount - guest.baseArrayLayer))}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(textureControl.Iova(), guest.GetLayerStride() * (guest.layerCount - guest.baseArrayLayer))};
guest.mappings.assign(mappings.begin(), mappings.end()); guest.mappings.assign(mappings.begin(), mappings.end());
} else if (auto textureView{poolTexture.view.lock()}; textureView != nullptr) { } else if (auto textureView{poolTexture.view.lock()}; textureView != nullptr) {
// If the entry already exists and the view is still valid then we return it directly // If the entry already exists and the view is still valid then we return it directly

View File

@ -10,7 +10,10 @@
#include "adreno_aliasing.h" #include "adreno_aliasing.h"
namespace skyline::gpu { namespace skyline::gpu {
u32 GuestTexture::GetLayerSize() { u32 GuestTexture::GetLayerStride() {
if (layerStride)
return layerStride;
switch (tileConfig.mode) { switch (tileConfig.mode) {
case texture::TileMode::Linear: case texture::TileMode::Linear:
return layerStride = static_cast<u32>(format->GetSize(dimensions)); return layerStride = static_cast<u32>(format->GetSize(dimensions));
@ -134,7 +137,7 @@ namespace skyline::gpu {
throw exception("Guest and host dimensions being different is not supported currently"); throw exception("Guest and host dimensions being different is not supported currently");
auto pointer{mirror.data()}; auto pointer{mirror.data()};
auto size{format->GetSize(dimensions) * layerCount}; auto size{layerStride * layerCount};
WaitOnBacking(); WaitOnBacking();
@ -158,12 +161,16 @@ namespace skyline::gpu {
} }
}()}; }()};
if (guest->tileConfig.mode == texture::TileMode::Block) for (size_t layer{}; layer < layerCount; ++layer) {
texture::CopyBlockLinearToLinear(*guest, pointer, bufferData); if (guest->tileConfig.mode == texture::TileMode::Block)
else if (guest->tileConfig.mode == texture::TileMode::Pitch) texture::CopyBlockLinearToLinear(*guest, pointer, bufferData);
texture::CopyPitchLinearToLinear(*guest, pointer, bufferData); else if (guest->tileConfig.mode == texture::TileMode::Pitch)
else if (guest->tileConfig.mode == texture::TileMode::Linear) texture::CopyPitchLinearToLinear(*guest, pointer, bufferData);
std::memcpy(bufferData, pointer, size); else if (guest->tileConfig.mode == texture::TileMode::Linear)
std::memcpy(bufferData, pointer, size);
pointer += guest->GetLayerStride();
bufferData += layerStride;
}
if (stagingBuffer && cycle.lock() != pCycle) if (stagingBuffer && cycle.lock() != pCycle)
WaitOnFence(); WaitOnFence();
@ -263,12 +270,16 @@ namespace skyline::gpu {
void Texture::CopyToGuest(u8 *hostBuffer) { void Texture::CopyToGuest(u8 *hostBuffer) {
auto guestOutput{mirror.data()}; auto guestOutput{mirror.data()};
if (guest->tileConfig.mode == texture::TileMode::Block) for (size_t layer{}; layer < layerCount; ++layer) {
texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput); if (guest->tileConfig.mode == texture::TileMode::Block)
else if (guest->tileConfig.mode == texture::TileMode::Pitch) texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput);
texture::CopyLinearToPitchLinear(*guest, hostBuffer, guestOutput); else if (guest->tileConfig.mode == texture::TileMode::Pitch)
else if (guest->tileConfig.mode == texture::TileMode::Linear) texture::CopyLinearToPitchLinear(*guest, hostBuffer, guestOutput);
std::memcpy(hostBuffer, guestOutput, format->GetSize(dimensions)); else if (guest->tileConfig.mode == texture::TileMode::Linear)
std::memcpy(hostBuffer, guestOutput, layerStride);
guestOutput += guest->layerStride;
hostBuffer += layerStride;
}
} }
Texture::TextureBufferCopy::TextureBufferCopy(std::shared_ptr<Texture> texture, std::shared_ptr<memory::StagingBuffer> stagingBuffer) : texture(std::move(texture)), stagingBuffer(std::move(stagingBuffer)) {} Texture::TextureBufferCopy::TextureBufferCopy(std::shared_ptr<Texture> texture, std::shared_ptr<memory::StagingBuffer> stagingBuffer) : texture(std::move(texture)), stagingBuffer(std::move(stagingBuffer)) {}
@ -289,6 +300,7 @@ namespace skyline::gpu {
usage(usage), usage(usage),
mipLevels(mipLevels), mipLevels(mipLevels),
layerCount(layerCount), layerCount(layerCount),
layerStride(static_cast<u32>(format->GetSize(dimensions))),
sampleCount(sampleCount) {} sampleCount(sampleCount) {}
Texture::Texture(GPU &pGpu, GuestTexture pGuest) Texture::Texture(GPU &pGpu, GuestTexture pGuest)
@ -300,6 +312,7 @@ namespace skyline::gpu {
tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization
mipLevels(1), mipLevels(1),
layerCount(guest->layerCount), layerCount(guest->layerCount),
layerStride(static_cast<u32>(format->GetSize(dimensions))),
sampleCount(vk::SampleCountFlagBits::e1), sampleCount(vk::SampleCountFlagBits::e1),
flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat), flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat),
usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) { usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) {
@ -485,7 +498,7 @@ namespace skyline::gpu {
WaitOnFence(); WaitOnFence();
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) { if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
auto size{format->GetSize(dimensions) * layerCount}; auto size{layerStride * layerCount};
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)}; auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
@ -519,7 +532,7 @@ namespace skyline::gpu {
WaitOnFence(); WaitOnFence();
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) { if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
auto size{format->GetSize(dimensions) * layerCount}; auto size{layerStride * layerCount};
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)}; auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
CopyIntoStagingBuffer(commandBuffer, stagingBuffer); CopyIntoStagingBuffer(commandBuffer, stagingBuffer);

View File

@ -238,15 +238,15 @@ namespace skyline::gpu {
texture::Format format{}; texture::Format format{};
texture::TileConfig tileConfig{}; texture::TileConfig tileConfig{};
texture::TextureType type{}; texture::TextureType type{};
u16 baseArrayLayer{}; u32 baseArrayLayer{};
u16 layerCount{}; u32 layerCount{};
u32 layerStride{}; //!< An optional hint regarding the size of a single layer, it will be set to 0 when not available, GetLayerSize() should be used to retrieve this value u32 layerStride{}; //!< An optional hint regarding the size of a single layer, it **should** be set to 0 when not available and should never be a non-0 value that doesn't reflect the correct layer stride
vk::ComponentMapping swizzle{}; //!< Component swizzle derived from format requirements and the guest supplied swizzle vk::ComponentMapping swizzle{}; //!< Component swizzle derived from format requirements and the guest supplied swizzle
vk::ImageAspectFlags aspect{}; vk::ImageAspectFlags aspect{};
GuestTexture() {} GuestTexture() {}
GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u32 baseArrayLayer = 0, u32 layerCount = 1, u32 layerStride = 0)
: mappings(mappings), : mappings(mappings),
dimensions(dimensions), dimensions(dimensions),
format(format), format(format),
@ -257,7 +257,7 @@ namespace skyline::gpu {
layerStride(layerStride), layerStride(layerStride),
aspect(format->vkAspect) {} aspect(format->vkAspect) {}
GuestTexture(span <u8> mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) GuestTexture(span<u8> mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u32 baseArrayLayer = 0, u32 layerCount = 1, u32 layerStride = 0)
: mappings(1, mapping), : mappings(1, mapping),
dimensions(dimensions), dimensions(dimensions),
format(format), format(format),
@ -269,10 +269,11 @@ namespace skyline::gpu {
aspect(format->vkAspect) {} aspect(format->vkAspect) {}
/** /**
* @note This should be used over accessing the `layerStride` member directly when desiring the actual layer stride for calculations as it will automatically handle it not being filled in
* @note Requires `dimensions`, `format` and `tileConfig` to be filled in * @note Requires `dimensions`, `format` and `tileConfig` to be filled in
* @return The size of a single layer with alignment in bytes * @return The size of a single layer with layout alignment in bytes
*/ */
u32 GetLayerSize(); u32 GetLayerStride();
}; };
class TextureManager; class TextureManager;
@ -417,6 +418,7 @@ namespace skyline::gpu {
vk::ImageUsageFlags usage; vk::ImageUsageFlags usage;
u32 mipLevels; u32 mipLevels;
u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap) u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap)
u32 layerStride; //!< The stride of a single array layer given linear tiling
vk::SampleCountFlagBits sampleCount; vk::SampleCountFlagBits sampleCount;
/** /**

View File

@ -345,7 +345,7 @@ namespace skyline::service::hosbinder {
gpu::texture::Dimensions dimensions(surface.width, surface.height); gpu::texture::Dimensions dimensions(surface.width, surface.height);
gpu::GuestTexture guestTexture(span<u8>{}, dimensions, format, tileConfig, gpu::texture::TextureType::e2D); gpu::GuestTexture guestTexture(span<u8>{}, dimensions, format, tileConfig, gpu::texture::TextureType::e2D);
guestTexture.mappings[0] = span<u8>(nvMapHandleObj->GetPointer() + surface.offset, guestTexture.GetLayerSize()); guestTexture.mappings[0] = span<u8>(nvMapHandleObj->GetPointer() + surface.offset, guestTexture.GetLayerStride());
buffer.texture = state.gpu->texture.FindOrCreate(guestTexture)->texture; buffer.texture = state.gpu->texture.FindOrCreate(guestTexture)->texture;
} }

View File

@ -86,7 +86,7 @@ namespace skyline::soc::gm20b::engine {
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear }, gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
gpu::texture::TextureType::e2D}; gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) { if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) {
srcTexture.mappings[0] = mappings[0]; srcTexture.mappings[0] = mappings[0];
} else { } else {
Logger::Warn("DMA for split textures is unimplemented!"); Logger::Warn("DMA for split textures is unimplemented!");
@ -100,8 +100,8 @@ namespace skyline::soc::gm20b::engine {
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 }, gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 },
gpu::texture::TextureType::e2D}; gpu::texture::TextureType::e2D};
u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerSize() * registers.dstSurface->layer}; u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerStride() * registers.dstSurface->layer};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerSize())}; mappings.size() == 1) { if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerStride())}; mappings.size() == 1) {
dstTexture.mappings[0] = mappings[0]; dstTexture.mappings[0] = mappings[0];
} else { } else {
Logger::Warn("DMA for split textures is unimplemented!"); Logger::Warn("DMA for split textures is unimplemented!");
@ -145,7 +145,7 @@ namespace skyline::soc::gm20b::engine {
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.srcSurface->blockSize.Height(), .blockDepth = 1 }, gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.srcSurface->blockSize.Height(), .blockDepth = 1 },
gpu::texture::TextureType::e2D}; gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) { if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) {
srcTexture.mappings[0] = mappings[0]; srcTexture.mappings[0] = mappings[0];
} else { } else {
Logger::Warn("DMA for split textures is unimplemented!"); Logger::Warn("DMA for split textures is unimplemented!");
@ -158,7 +158,7 @@ namespace skyline::soc::gm20b::engine {
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear }, gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
gpu::texture::TextureType::e2D}; gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstTexture.GetLayerSize())}; mappings.size() == 1) { if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstTexture.GetLayerStride())}; mappings.size() == 1) {
dstTexture.mappings[0] = mappings[0]; dstTexture.mappings[0] = mappings[0];
} else { } else {
Logger::Warn("DMA for split textures is unimplemented!"); Logger::Warn("DMA for split textures is unimplemented!");