diff --git a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp index e0fd4976..b4e4f442 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp @@ -44,12 +44,12 @@ namespace skyline::gpu::interconnect { }); } - void Inline2Memory::Upload(IOVA dst, span src) { - auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dst, src.size_bytes())}; + void Inline2Memory::Upload(IOVA dst, span src) { + auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dst, src.size())}; size_t offset{}; for (auto mapping : dstMappings) { - UploadSingleMapping(mapping, src.cast().subspan(offset, mapping.size())); + UploadSingleMapping(mapping, src.subspan(offset, mapping.size())); offset += mapping.size(); } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.h b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.h index 91c71b8c..47af2708 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.h @@ -33,6 +33,6 @@ namespace skyline::gpu::interconnect { public: Inline2Memory(GPU &gpu, soc::gm20b::ChannelContext &channelCtx); - void Upload(IOVA dst, span src); + void Upload(IOVA dst, span src); }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp index 22aa5e8a..36f550c1 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 // Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +#include #include #include "inline2memory.h" @@ -19,13 +20,52 @@ namespace skyline::soc::gm20b::engine { if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore) throw exception("Semaphore release on I2M completion is not supported!"); - if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch && state.lineCount == 1) { - Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4); + Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4); + if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch) { channelCtx.channelSequenceNumber++; - interconnect.Upload(u64{state.offsetOut}, span{buffer}); + + auto srcBuffer{span{buffer}.cast()}; + for (u32 line{}, pitchOffset{}; line < state.lineCount; ++line, pitchOffset += state.pitchOut) + interconnect.Upload(u64{state.offsetOut + pitchOffset}, srcBuffer.subspan(state.lineLengthIn * line, state.lineLengthIn)); + } else { channelCtx.executor.Submit(); - Logger::Warn("Non-linear I2M uploads are not supported!"); + + gpu::texture::Dimensions srcDimensions{state.lineLengthIn, state.lineCount, state.dstDepth}; + + gpu::texture::Dimensions dstDimensions{state.dstWidth, state.dstHeight, state.dstDepth}; + size_t dstSize{GetBlockLinearLayerSize(dstDimensions, 1, 1, 1, 1 << (u8)state.dstBlockSize.height, 1 << (u8)state.dstBlockSize.depth)}; + + auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(state.offsetOut, dstSize)}; + + auto inlineCopy{[&](u8 *dst){ + // The I2M engine only supports a formatBpb of 1 + if ((srcDimensions.width != dstDimensions.width) || (srcDimensions.height != dstDimensions.height)) + gpu::texture::CopyLinearToBlockLinearSubrect(srcDimensions, dstDimensions, + 1, 1, 1, + 1 << static_cast(state.dstBlockSize.height), 1 << static_cast(state.dstBlockSize.depth), + span{buffer}.cast().data(), dst, + state.originBytesX, state.originSamplesY + ); + else + gpu::texture::CopyLinearToBlockLinear(dstDimensions, + 1, 1, 1, + 1 << static_cast(state.dstBlockSize.height), 1 << static_cast(state.dstBlockSize.depth), + span{buffer}.cast().data(), dst + ); + }}; + + if (dstMappings.size() != 1) { + // We create a temporary buffer to hold the blockLinear texture if mappings are split + // NOTE: We don't reserve memory here since such copies on this engine are rarely used + std::vector tempBuffer(dstSize); + + inlineCopy(tempBuffer.data()); + + interconnect.Upload(u64{state.offsetOut}, span{tempBuffer}); + } else { + inlineCopy(dstMappings.front().data()); + } } }