Replace Maxwell DMA GuestTexture usage with new swizzling API

Maxwell DMA requires swizzled copies to/from textures and earlier it had to construct an arbitrary `GuestTexture` to do so but with the introduction of the cleaner API, this has become redundant which this commit cleans up and replaces with direct calls to the API with all the necessary values.
This commit is contained in:
PixelyIon 2022-05-28 21:21:51 +05:30
parent de300bfdbe
commit da7e6a7df7
2 changed files with 48 additions and 71 deletions

View File

@ -182,22 +182,4 @@ namespace skyline::gpu::format {
#undef FORMAT_NORM_INT_FLOAT #undef FORMAT_NORM_INT_FLOAT
// @fmt:on // @fmt:on
inline const gpu::texture::FormatBase &GetFormatForBpp(u32 bytesPerPixel) {
switch (bytesPerPixel) {
case 1:
return R8Uint;
case 2:
return R8G8Uint;
case 4:
return R8G8B8A8Uint;
case 8:
return R16G16B16A16Uint;
case 16:
return R32G32B32A32Uint;
default:
Logger::Error("Couldn't convert bytes per pixel: {}", bytesPerPixel);
return R8Uint;
}
}
} }

View File

@ -39,14 +39,14 @@ namespace skyline::soc::gm20b::engine {
executor.Execute(); executor.Execute();
if (registers.launchDma->multiLineEnable) { if (registers.launchDma->multiLineEnable) {
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch && if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch &&
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
CopyPitchToBlockLinear(); CopyPitchToBlockLinear();
else if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear && else if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear &&
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch) registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch)
CopyBlockLinearToPitch(); CopyBlockLinearToPitch();
else else
Logger::Warn("Unimplemented multi-line copy type: {} -> {}!", Logger::Warn("Unimplemented multi-line copy type: {} -> {}!",
static_cast<u8>(registers.launchDma->srcMemoryLayout), static_cast<u8>(registers.launchDma->dstMemoryLayout)); static_cast<u8>(registers.launchDma->srcMemoryLayout), static_cast<u8>(registers.launchDma->dstMemoryLayout));
} else { } else {
// 1D buffer copy // 1D buffer copy
// TODO: implement swizzled 1D copies based on VMM 'kind' // TODO: implement swizzled 1D copies based on VMM 'kind'
@ -77,117 +77,112 @@ namespace skyline::soc::gm20b::engine {
void MaxwellDma::CopyPitchToBlockLinear() { void MaxwellDma::CopyPitchToBlockLinear() {
if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) { if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) {
Logger::Warn("3D DMA engine copies are unimplemented!"); Logger::Warn("3D DMA engine copies are unimplemented");
return; return;
} }
if (registers.dstSurface->blockSize.Width() != 1) { if (registers.dstSurface->blockSize.Width() != 1) {
Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!"); Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented");
return; return;
} }
u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())}; u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())};
if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) { if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) {
Logger::Warn("Non-linear DMA source textures are not implemented!"); Logger::Warn("Non-linear DMA source textures are not implemented");
return; return;
} }
if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) { if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) {
Logger::Warn("Non-zero origin DMA copies are not implemented!"); Logger::Warn("Non-zero origin DMA copies are not implemented");
return; return;
} }
if (*registers.lineLengthIn != registers.dstSurface->width) if (*registers.lineLengthIn != registers.dstSurface->width)
Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width); Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width);
gpu::GuestTexture srcTexture{span<u8>{}, gpu::texture::Dimensions srcDimensions{*registers.lineLengthIn, *registers.lineCount, 1};
gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1}, size_t srcStride{srcDimensions.width * srcDimensions.height * bytesPerPixel};
gpu::format::GetFormatForBpp(bytesPerPixel),
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) { auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcStride)};
srcTexture.mappings[0] = mappings[0]; if (srcMappings.size() != 1) {
} else { Logger::Warn("DMA for split textures is unimplemented");
Logger::Warn("DMA for split textures is unimplemented!");
return; return;
} }
// This represents a single layer view into a potentially multi-layer texture gpu::texture::Dimensions dstDimensions{registers.dstSurface->width, registers.dstSurface->height, registers.dstSurface->depth};
gpu::GuestTexture dstTexture{span<u8>{}, dstDimensions.width = *registers.lineLengthIn; // We do not support copying subrects so we need the width to match on the source and destination
gpu::texture::Dimensions{*registers.lineLengthIn, registers.dstSurface->height, 1}, size_t dstBlockHeight{registers.dstSurface->blockSize.Height()}, dstBlockDepth{registers.dstSurface->blockSize.Depth()};
gpu::format::GetFormatForBpp(bytesPerPixel), size_t dstLayerStride{gpu::texture::GetBlockLinearLayerSize(dstDimensions, 1, 1, bytesPerPixel, dstBlockHeight, dstBlockDepth)};
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 },
gpu::texture::TextureType::e2D};
u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerStride() * registers.dstSurface->layer}; size_t dstLayerAddress{*registers.offsetOut + (registers.dstSurface->layer * dstLayerStride)};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerStride())}; mappings.size() == 1) { auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstLayerStride)};
dstTexture.mappings[0] = mappings[0]; if (dstMappings.size() != 1) {
} else { Logger::Warn("DMA for split textures is unimplemented");
Logger::Warn("DMA for split textures is unimplemented!");
return; return;
} }
Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, u64{*registers.offsetIn}, dstTexture.dimensions.width, dstTexture.dimensions.height, dstLayerAddress); Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, u64{*registers.offsetIn}, dstDimensions.width, dstDimensions.height, dstLayerAddress);
gpu::texture::CopyLinearToBlockLinear(dstTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data()); gpu::texture::CopyLinearToBlockLinear(
dstDimensions,
1, 1, bytesPerPixel,
dstBlockHeight, dstBlockDepth,
srcMappings.front().data(), dstMappings.front().data()
);
} }
void MaxwellDma::CopyBlockLinearToPitch() { void MaxwellDma::CopyBlockLinearToPitch() {
if (registers.srcSurface->blockSize.Depth() > 1 || registers.srcSurface->depth > 1) { if (registers.srcSurface->blockSize.Depth() > 1 || registers.srcSurface->depth > 1) {
Logger::Warn("3D DMA engine copies are unimplemented!"); Logger::Warn("3D DMA engine copies are unimplemented");
return; return;
} }
if (registers.srcSurface->blockSize.Width() != 1) { if (registers.srcSurface->blockSize.Width() != 1) {
Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!"); Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented");
return; return;
} }
u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())}; u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())};
if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchOut) { if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchOut) {
Logger::Warn("Non-linear DMA destination textures are not implemented!"); Logger::Warn("Non-linear DMA destination textures are not implemented");
return; return;
} }
if (registers.srcSurface->origin.x || registers.srcSurface->origin.y) { if (registers.srcSurface->origin.x || registers.srcSurface->origin.y) {
Logger::Warn("Non-zero origin DMA copies are not implemented!"); Logger::Warn("Non-zero origin DMA copies are not implemented");
return; return;
} }
if (*registers.lineLengthIn != registers.srcSurface->width) if (*registers.lineLengthIn != registers.srcSurface->width)
Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width); Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width);
gpu::GuestTexture srcTexture{span<u8>{}, gpu::texture::Dimensions srcDimensions{registers.srcSurface->width, registers.srcSurface->height, registers.srcSurface->depth};
gpu::texture::Dimensions{registers.srcSurface->width, registers.srcSurface->height, 1}, srcDimensions.width = *registers.lineLengthIn; // We do not support copying subrects so we need the width to match on the source and destination
gpu::format::GetFormatForBpp(bytesPerPixel), size_t srcBlockHeight{registers.srcSurface->blockSize.Height()}, srcBlockDepth{registers.srcSurface->blockSize.Depth()};
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.srcSurface->blockSize.Height(), .blockDepth = 1 }, size_t srcStride{gpu::texture::GetBlockLinearLayerSize(srcDimensions, 1, 1, bytesPerPixel, srcBlockHeight, srcBlockDepth)};
gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) { auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcStride)};
srcTexture.mappings[0] = mappings[0]; if (srcMappings.size() != 1) {
} else { Logger::Warn("DMA for split textures is unimplemented");
Logger::Warn("DMA for split textures is unimplemented!");
return; return;
} }
gpu::GuestTexture dstTexture{span<u8>{}, gpu::texture::Dimensions dstDimensions{*registers.lineLengthIn, *registers.lineCount, 1};
gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1}, size_t dstStride{dstDimensions.width * dstDimensions.height * bytesPerPixel};
gpu::format::GetFormatForBpp(bytesPerPixel),
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstTexture.GetLayerStride())}; mappings.size() == 1) { auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstStride)};
dstTexture.mappings[0] = mappings[0]; if (dstMappings.size() != 1) {
} else { Logger::Warn("DMA for split textures is unimplemented");
Logger::Warn("DMA for split textures is unimplemented!");
return; return;
} }
Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, u64{*registers.offsetIn}, dstTexture.dimensions.width, dstTexture.dimensions.height, u64{*registers.offsetOut}); Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, u64{*registers.offsetIn}, dstDimensions.width, dstDimensions.height, u64{*registers.offsetOut});
gpu::texture::CopyBlockLinearToLinear(srcTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data()); gpu::texture::CopyBlockLinearToLinear(
srcDimensions,
1, 1, bytesPerPixel,
srcBlockHeight, srcBlockDepth,
srcMappings.front().data(), dstMappings.front().data());
} }
void MaxwellDma::CallMethodBatchNonInc(u32 method, span<u32> arguments) { void MaxwellDma::CallMethodBatchNonInc(u32 method, span<u32> arguments) {