From d094cc142da0c7d24108097e2e8b00d43addb637 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 14 Aug 2021 20:42:11 +0100 Subject: [PATCH] NEEDS CLEANUP: Reimplement GPU VMM and rewrite nvdrv VM impl --- app/CMakeLists.txt | 2 +- .../main/cpp/skyline/common/address_space.h | 155 ++++++++ .../main/cpp/skyline/common/address_space.inc | 354 ++++++++++++++++++ .../main/cpp/skyline/common/circular_queue.h | 6 + app/src/main/cpp/skyline/common/signal.cpp | 2 - app/src/main/cpp/skyline/common/trace.h | 3 +- .../main/cpp/skyline/services/common/result.h | 1 + .../{macro_def.h => macro_def.inc} | 0 .../{macro_undef.h => macro_undef.inc} | 0 .../services/nvdrv/devices/nvhost/as_gpu.cpp | 241 +++++++++--- .../services/nvdrv/devices/nvhost/as_gpu.h | 58 ++- .../services/nvdrv/devices/nvhost/ctrl.cpp | 4 +- .../nvdrv/devices/nvhost/ctrl_gpu.cpp | 4 +- .../nvdrv/devices/nvhost/gpu_channel.cpp | 4 +- .../skyline/services/nvdrv/devices/nvmap.cpp | 4 +- app/src/main/cpp/skyline/soc.h | 4 +- app/src/main/cpp/skyline/soc/gm20b.cpp | 20 + app/src/main/cpp/skyline/soc/gm20b.h | 9 +- .../engines/maxwell/macro_interpreter.cpp | 2 +- .../skyline/soc/gm20b/engines/maxwell_3d.cpp | 4 +- app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp | 7 +- app/src/main/cpp/skyline/soc/gmmu.cpp | 214 ----------- app/src/main/cpp/skyline/soc/gmmu.h | 140 ------- 23 files changed, 794 insertions(+), 444 deletions(-) create mode 100644 app/src/main/cpp/skyline/common/address_space.h create mode 100644 app/src/main/cpp/skyline/common/address_space.inc rename app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/{macro_def.h => macro_def.inc} (100%) rename app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/{macro_undef.h => macro_undef.inc} (100%) create mode 100644 app/src/main/cpp/skyline/soc/gm20b.cpp delete mode 100644 app/src/main/cpp/skyline/soc/gmmu.cpp delete mode 100644 app/src/main/cpp/skyline/soc/gmmu.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index cfadde47..474a4899 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -96,7 +96,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/command_scheduler.cpp ${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp - ${source_DIR}/skyline/soc/gmmu.cpp + ${source_DIR}/skyline/soc/gm20b.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp diff --git a/app/src/main/cpp/skyline/common/address_space.h b/app/src/main/cpp/skyline/common/address_space.h new file mode 100644 index 00000000..d8768221 --- /dev/null +++ b/app/src/main/cpp/skyline/common/address_space.h @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include + +namespace skyline { + template + concept AddressSpaceValid = std::is_unsigned_v && sizeof(VaType) * 8 >= AddressSpaceBits; + + /** + * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector + */ + template requires AddressSpaceValid + class FlatAddressSpaceMap { + private: + /** + * @brief Represents a block of memory in the AS + */ + struct Block { + VaType virt{UnmappedVa}; //!< VA of the block + PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block is encountered + bool flag{}; //!< General purpose flag for use by derived classes + + Block() = default; + + Block(VaType virt, PaType phys, bool flag) : virt(virt), phys(phys), flag(flag) {} + + constexpr bool Valid() { + return virt != UnmappedVa; + } + + constexpr bool Mapped() { + return phys != UnmappedPa; + } + + constexpr bool Unmapped() { + return phys == UnmappedPa; + } + + bool operator<(const VaType &pVirt) const { + return virt < pVirt; + } + }; + + protected: + std::mutex blockMutex; + std::vector blocks{Block{}}; + + /** + * @brief Maps a PA range into the given AS region, optionally setting the flag + * @note blockMutex MUST be locked when calling this + */ + void MapLocked(VaType virt, PaType phys, VaType size, bool flag = {}); + + /** + * @brief Unmaps the given range and merges it with other unmapped regions + * @note blockMutex MUST be locked when calling this + */ + void UnmapLocked(VaType virt, VaType size); + + public: + static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + ((1ULL << (AddressSpaceBits - 1)) - 1)}; //!< The maximum VA that this AS can technically reach + + VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS + + FlatAddressSpaceMap(VaType pVaLimit); + + FlatAddressSpaceMap() = default; + + /** + * @brief Locked version of MapLocked + */ + void Map(VaType virt, PaType phys, VaType size, bool flag = {}); + + /** + * @brief Locked version of UnmapLocked + */ + void Unmap(VaType virt, VaType size); + }; + + /** + * @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions + */ + template requires AddressSpaceValid + class FlatMemoryManager : public FlatAddressSpaceMap { + public: + /** + * @return A placeholder address for sparse mapped regions, this means nothing + */ + static u8 *SparsePlaceholderAddress() { + return reinterpret_cast(0xCAFEBABE); + } + + void Read(u8 *destination, VaType virt, VaType size); + + template + void Read(span destination, VaType virt) { + Read(reinterpret_cast(destination.data()), virt, destination.size_bytes()); + } + + template + T Read(VaType virt) { + T obj; + Read(reinterpret_cast(&obj), virt, sizeof(T)); + return obj; + } + + void Write(VaType virt, u8 *source, VaType size); + + template + void Write(VaType virt, span source) { + Write(virt, reinterpret_cast(source.data()), source.size_bytes()); + } + + template + void Write(VaType virt, T source) { + Write(virt, reinterpret_cast(&source), sizeof(T)); + } + }; + + + /** + * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block + */ + template requires AddressSpaceValid + class FlatAllocator : public FlatAddressSpaceMap { + private: + using Base = FlatAddressSpaceMap; + + VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once this reaches the AS limit the slower allocation path will be used + + public: + VaType vaStart; //!< The base VA of the allocator, no allocations will be below this + + FlatAllocator(VaType vaStart, VaType vaLimit); + + /** + * @brief Allocates a region in the AS of the given size and returns its address + */ + VaType Allocate(VaType size); + + /** + * @brief Marks the given region in the AS as allocated + */ + void AllocateFixed(VaType virt, VaType size); + + /** + * @brief Frees an AS region so it can be used again + */ + void Free(VaType virt, VaType size); + }; +} diff --git a/app/src/main/cpp/skyline/common/address_space.inc b/app/src/main/cpp/skyline/common/address_space.inc new file mode 100644 index 00000000..07a37610 --- /dev/null +++ b/app/src/main/cpp/skyline/common/address_space.inc @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include "address_space.h" + +#define MAP_MEMBER(returnType) template requires AddressSpaceValid returnType FlatAddressSpaceMap + +#define MM_MEMBER(returnType) template requires AddressSpaceValid returnType FlatMemoryManager + +#define ALLOC_MEMBER(returnType) template requires AddressSpaceValid returnType FlatAllocator + +namespace skyline { + MAP_MEMBER()::FlatAddressSpaceMap(VaType pVaLimit) : vaLimit(pVaLimit) { + if (pVaLimit > VaMaximum) + throw exception("Invalid VA limit!"); + } + + MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, bool flag) { + TRACE_EVENT("containers", "FlatAddressSpaceMap::Map"); + + VaType virtEnd{virt + size}; + + if (virtEnd > vaLimit) + throw exception("Trying to map a block past the VA limit!"); + + auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; + if (blockEndSuccessor == blocks.begin()) + throw exception("Unexpected Memory Manager state!"); + + auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + + if (blockEndSuccessor != blocks.end()) { + // We have blocks in front of us, if one is directly in front then we don't have to add a tail + if (blockEndSuccessor->virt != virtEnd) { + PaType tailPhys{[&]() -> PaType { + if (!PaContigSplit || blockEndPredecessor->Unmapped()) + return blockEndPredecessor->phys; // Always propagate unmapped regions + else + return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + }()}; + + if (blockEndPredecessor->virt >= virt) { + // If this block's start would be overlapped by the map then reuse it as a tail block + blockEndPredecessor->virt = virtEnd; + blockEndPredecessor->phys = tailPhys; + blockEndPredecessor->flag = blockEndPredecessor->flag; + } else { + // Else insert a new one and we're done + blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, tailPhys, blockEndPredecessor->flag)}); + return; + } + } + } else { + // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped chunk + if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) { + // Move the unmapped block start backwards + blockEndPredecessor->virt = virtEnd; + } else { + // Else insert a new one and we're done + blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, UnmappedPa, false)}); + return; + } + } + + auto blockStartSuccessor{blockEndPredecessor}; + + // Walk the block vector to find the start successor as this is more efficient than another binary search in most scenarios + while (std::prev(blockStartSuccessor)->virt >= virt) + std::advance(blockStartSuccessor, -1); + + if (blockStartSuccessor->virt > virtEnd) + throw exception("Unexpected Memory Manager state!"); + + if (blockStartSuccessor->virt == virtEnd) { + // We need to create a new block as there are none spare that we would overwrite + blocks.insert(blockStartSuccessor, Block(virt, phys, flag)); + return; + } else { + blockStartSuccessor->virt = virt; + blockStartSuccessor->phys = phys; + blockStartSuccessor->flag = flag; + + // Erase overwritten blocks + if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) { + if (eraseStart == blockEndPredecessor) + __builtin_trap(); + + blocks.erase(eraseStart, blockEndPredecessor); + } + } + } + + MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { + TRACE_EVENT("containers", "FlatAddressSpaceMap::Unmap"); + + VaType virtEnd{virt + size}; + + if (virtEnd > vaLimit) + throw exception("Trying to map a block past the VA limit!"); + + auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; + if (blockEndSuccessor == blocks.begin()) + throw exception("Unexpected Memory Manager state!"); + + auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + + auto walkBackToPredecessor{[&](auto iter) { + while (iter->virt >= virt) + std::advance(iter, -1); + + return iter; + }}; + + auto eraseBlocksWithEndUnmapped{[&] (auto unmappedEnd) { + auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)}; + auto blockStartSuccessor{std::next(blockStartPredecessor)}; + + auto eraseEnd{[&]() { + if (blockStartPredecessor->Unmapped()) { + // If the start predecessor is unmapped then we can erase everything in our region and be done + return std::next(unmappedEnd); + } else { + // Else reuse the end predecessor as the start of our unmapped region then erase all up to it + unmappedEnd->virt = virt; + return unmappedEnd; + } + }()}; + + // We can't have two unmapped regions after each other + if (eraseEnd == blockStartSuccessor || (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped())) + throw exception("Unexpected Memory Manager state!"); + + blocks.erase(blockStartSuccessor, eraseEnd); + }}; + + // We can avoid any splitting logic if these are the case + if (blockEndPredecessor->Unmapped()) { + if (blockEndPredecessor->virt > virt) + eraseBlocksWithEndUnmapped(blockEndPredecessor); + + return; // The region is unmapped, bail out early + } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) { + eraseBlocksWithEndUnmapped(blockEndSuccessor); + return; // The region is unmapped here and doesn't need splitting, bail out early + } else if (blockEndSuccessor == blocks.end()) { + // This should never happen as the end should always follow an unmapped block + throw exception("Unexpected Memory Manager state!"); + } else if (blockEndSuccessor->virt != virtEnd) { + // If one block is directly in front then we don't have to add a tail + + // The previous block is mapped so we will need to add a tail with an offset + PaType tailPhys{[&]() { + if constexpr (PaContigSplit) + return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + else + return blockEndPredecessor->phys; + }()}; + + if (blockEndPredecessor->virt >= virt) { + // If this block's start would be overlapped by the unmap then reuse it as a tail block + blockEndPredecessor->virt = virtEnd; + blockEndPredecessor->phys = tailPhys; + } else { + blocks.insert(blockEndSuccessor, {Block(virt, UnmappedPa, false), Block(virtEnd, tailPhys, blockEndPredecessor->flag)}); + return; // The previous block is mapped and ends bef + } + } + + // Walk the block vector to find the start predecessor as this is more efficient than another binary search in most scenarios + auto blockStartPredecessor{walkBackToPredecessor(blockEndPredecessor)}; + auto blockStartSuccessor{std::next(blockStartPredecessor)}; + + if (blockStartSuccessor->virt > virtEnd) + throw exception("Unexpected Memory Manager state!"); + + if (blockStartSuccessor->virt == virtEnd) { + // There are no blocks between the start and the end that would let us skip inserting a new one for head + + // The previous block is may be unmapped, if so we don't need to insert any unmaps after it + if (blockStartPredecessor->Mapped()) + blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, false)); + } else if (blockStartPredecessor->Unmapped()) { + // If the previous block is unmapped + blocks.erase(blockStartSuccessor, blockEndPredecessor); + } else { + // Add in the unmapped block header + blockStartSuccessor->virt = virt; + blockStartSuccessor->phys = UnmappedPa; + + // Erase overwritten blocks, skipping the first one as we have written the unmapped start block there + if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) { + if (eraseStart == blockEndPredecessor) + __builtin_trap(); + + blocks.erase(eraseStart, blockEndPredecessor); + } + } + } + + MAP_MEMBER(void)::Map(VaType virt, PaType phys, VaType size, bool flag) { + std::scoped_lock lock(blockMutex); + MapLocked(virt, phys, size, flag); + } + + MAP_MEMBER(void)::Unmap(VaType virt, VaType size) { + std::scoped_lock lock(blockMutex); + UnmapLocked(virt, size); + } + + MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size) { + std::scoped_lock lock(this->blockMutex); + + TRACE_EVENT("containers", "FlatMemoryManager::Read"); + + VaType virtEnd{virt + size}; + + auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) { + return virt < block.virt; + })}; + + auto predecessor{std::prev(successor)}; + + u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)}; + VaType blockReadSize{std::min(successor->virt - virt, size)}; + + while (size) { + if (predecessor->phys == nullptr) { + if (predecessor->flag) // Sparse mapping + std::memset(destination, 0, blockReadSize); + else + throw exception("Page fault at: 0x{:X}", predecessor->virt); + } else { + std::memcpy(destination, blockPhys, blockReadSize); + } + + destination += blockReadSize; + size -= blockReadSize; + + if (size) { + predecessor = successor++; + blockPhys = predecessor->phys; + blockReadSize = std::min(successor->virt - predecessor->virt, size); + } + } + } + + MM_MEMBER(void)::Write(VaType virt, u8 *source, VaType size) { + std::scoped_lock lock(this->blockMutex); + + TRACE_EVENT("containers", "FlatMemoryManager::Write"); + + VaType virtEnd{virt + size}; + + auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) { + return virt < block.virt; + })}; + + auto predecessor{std::prev(successor)}; + + u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)}; + VaType blockWriteSize{std::min(successor->virt - virt, size)}; + + while (size) { + if (predecessor->phys == nullptr) { + if (!predecessor->flag) // Sparse mappings allow unmapped writes + throw exception("Page fault at: 0x{:X}", predecessor->virt); + } else { + std::memcpy(blockPhys, source, blockWriteSize); + } + + source += blockWriteSize; + size -= blockWriteSize; + + if (size) { + predecessor = successor++; + blockPhys = predecessor->phys; + blockWriteSize = std::min(successor->virt - predecessor->virt, size); + } + } + + } + + ALLOC_MEMBER()::FlatAllocator(VaType vaStart, VaType vaLimit) : Base(vaLimit), vaStart(vaStart), currentLinearAllocEnd(vaStart) {} + + ALLOC_MEMBER(VaType)::Allocate(VaType size) { + std::scoped_lock lock(this->blockMutex); + + TRACE_EVENT("containers", "FlatAllocator::Allocate"); + + VaType allocStart{UnmappedVa}; + VaType allocEnd{currentLinearAllocEnd + size}; + + if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) { + auto allocEndSuccessor{std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)}; + if (allocEndSuccessor == this->blocks.begin()) + throw exception("Unexpected allocator state!"); + + auto allocEndPredecessor{std::prev(allocEndSuccessor)}; + if (allocEndPredecessor->virt <= currentLinearAllocEnd) { + allocStart = currentLinearAllocEnd; + } else { + while (allocEndSuccessor != this->blocks.end()) { + if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || allocEndPredecessor->Mapped() ) { + allocStart = allocEndPredecessor->virt; + break; + } + + allocEndPredecessor = allocEndSuccessor++; + + if (allocEndSuccessor == this->blocks.end()) { + allocEnd = allocEndPredecessor->virt + size; + + if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit) + allocStart = allocEndPredecessor->virt; + } + } + } + } + + if (allocStart != UnmappedVa) { + currentLinearAllocEnd = allocStart + size; + } else { // If linear allocation overflows the AS then find a gap + if (this->blocks.size() <= 2) + throw exception("Unexpected allocator state!"); + + auto searchPredecessor{this->blocks.begin()}; + auto searchSuccessor{std::next(searchPredecessor)}; + + while (searchSuccessor != this->blocks.end() && + (searchSuccessor->virt - searchPredecessor->virt < size || searchPredecessor->Mapped())) { + searchPredecessor = searchSuccessor++; + } + + if (searchSuccessor != this->blocks.end()) + allocStart = searchPredecessor->virt; + else + throw exception("Unexpected allocator state!"); + } + + + this->MapLocked(allocStart, true, size); + return allocStart; + } + + ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) { + this->MapLocked(virt, true, size); + } + + ALLOC_MEMBER(void)::Free(VaType virt, VaType size) { + this->UnmapLocked(virt, size); + } +} diff --git a/app/src/main/cpp/skyline/common/circular_queue.h b/app/src/main/cpp/skyline/common/circular_queue.h index 00508c42..42c5d0de 100644 --- a/app/src/main/cpp/skyline/common/circular_queue.h +++ b/app/src/main/cpp/skyline/common/circular_queue.h @@ -3,6 +3,7 @@ #pragma once +#include #include namespace skyline { @@ -51,10 +52,15 @@ namespace skyline { */ template [[noreturn]] void Process(F function) { + TRACE_EVENT_BEGIN("containers", "CircularQueue::Process"); + while (true) { if (start == end) { std::unique_lock lock(productionMutex); + + TRACE_EVENT_END("containers"); produceCondition.wait(lock, [this]() { return start != end; }); + TRACE_EVENT_BEGIN("containers", "CircularQueue::Process"); } while (start != end) { diff --git a/app/src/main/cpp/skyline/common/signal.cpp b/app/src/main/cpp/skyline/common/signal.cpp index c3695144..4696f409 100644 --- a/app/src/main/cpp/skyline/common/signal.cpp +++ b/app/src/main/cpp/skyline/common/signal.cpp @@ -183,8 +183,6 @@ namespace skyline::signal { std::call_once(signalHandlerOnce[signal], [signal, &action]() { struct sigaction oldAction; Sigaction(signal, &action, &oldAction); - if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags) - throw exception("Old sigaction flags aren't equivalent to the replaced signal: {:#b} | {:#b}", oldAction.sa_flags, action.sa_flags); DefaultSignalHandlers.at(signal).function = (oldAction.sa_flags & SA_SIGINFO) ? oldAction.sa_sigaction : reinterpret_cast(oldAction.sa_handler); }); diff --git a/app/src/main/cpp/skyline/common/trace.h b/app/src/main/cpp/skyline/common/trace.h index 2305deb1..11de4045 100644 --- a/app/src/main/cpp/skyline/common/trace.h +++ b/app/src/main/cpp/skyline/common/trace.h @@ -13,7 +13,8 @@ PERFETTO_DEFINE_CATEGORIES( perfetto::Category("kernel").SetDescription("Events from parts of the HLE kernel"), perfetto::Category("guest").SetDescription("Events relating to guest code"), perfetto::Category("gpu").SetDescription("Events from the emulated GPU"), - perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations") + perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations"), + perfetto::Category("containers").SetDescription("Events from custom container implementations") ); namespace skyline::trace { diff --git a/app/src/main/cpp/skyline/services/common/result.h b/app/src/main/cpp/skyline/services/common/result.h index 62c287fa..7e9aed5f 100644 --- a/app/src/main/cpp/skyline/services/common/result.h +++ b/app/src/main/cpp/skyline/services/common/result.h @@ -13,6 +13,7 @@ namespace skyline::service { Busy = 16, // EBUSY InvalidArgument = 22, // EINVAL InappropriateIoctlForDevice = 25, // ENOTTY + FunctionNotImplemented = 38, // ENOSYS NotSupported = 95, // EOPNOTSUPP, ENOTSUP TimedOut = 110, // ETIMEDOUT diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_def.h b/app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_def.inc similarity index 100% rename from app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_def.h rename to app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_def.inc diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_undef.h b/app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_undef.inc similarity index 100% rename from app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_undef.h rename to app/src/main/cpp/skyline/services/nvdrv/devices/deserialisation/macro_undef.inc diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp index d8160a5f..31463d38 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp @@ -1,10 +1,16 @@ // SPDX-License-Identifier: MIT OR MPL-2.0 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +#include #include #include #include "as_gpu.h" +namespace skyline { + template class FlatAddressSpaceMap; + template class FlatAllocator; +} + namespace skyline::service::nvdrv::device::nvhost { AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} @@ -14,38 +20,66 @@ namespace skyline::service::nvdrv::device::nvhost { } PosixResult AsGpu::AllocSpace(In pages, In pageSize, In flags, InOut offset) { - // TODO: track this on the nvdrv side and have the gmmu only do virt -> phys - // Also fix error codes - u64 size{static_cast(pages) * static_cast(pageSize)}; + state.logger->Debug("pages: 0x{:X}, pageSize: 0x{:X}, flags: ( fixed: {}, sparse: {} ), offset: 0x{:X}", pages, pageSize, flags.fixed, flags.sparse, offset); + + if (pageSize != VM::PageSize && pageSize != vm.bigPageSize) + return PosixResult::InvalidArgument; + + if (pageSize != vm.bigPageSize && flags.sparse) + return PosixResult::FunctionNotImplemented; + + u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits}; + + auto &allocator{[&] () -> auto & { + if (pageSize == VM::PageSize) + return vm.smallPageAllocator; + else + return vm.bigPageAllocator; + }()}; if (flags.fixed) - offset = state.soc->gmmu.ReserveFixed(offset, size); + allocator->AllocateFixed(offset >> pageSizeBits, pages); else - offset = state.soc->gmmu.ReserveSpace(size, offset); // offset contains the input alignment + offset = static_cast(allocator->Allocate(pages)) << pageSizeBits; - if (offset == 0) { - state.logger->Warn("Failed to allocate GPU address space region!"); - return PosixResult::InvalidArgument; - } + u64 size{static_cast(pages) * static_cast(pageSize)}; + + if (flags.sparse) + state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true); + + allocationMap[offset] = { + .size = size, + .pageSize = pageSize, + .sparse = flags.sparse + }; return PosixResult::Success; } PosixResult AsGpu::FreeSpace(In offset, In pages, In pageSize) { - // TODO: implement this when we add nvdrv side address space allocation + // TODO: implement after UNMAP return PosixResult::Success; } PosixResult AsGpu::UnmapBuffer(In offset) { + state.logger->Debug("offset: 0x{:X}", offset); + try { - auto region{regionMap.at(offset)}; + auto mapping{mappingMap.at(offset)}; - // Non-fixed regions are unmapped so that they can be used by future non-fixed mappings - if (!region.fixed) - if (!state.soc->gmmu.Unmap(offset, region.size)) - state.logger->Warn("Failed to unmap region at 0x{:X}", offset); + if (!mapping->fixed) { + auto &allocator{mapping->bigPage ? vm.bigPageAllocator : vm.smallPageAllocator}; + u32 pageSizeBits{mapping->bigPage ? vm.bigPageSizeBits : VM::PageSizeBits}; - regionMap.erase(offset); + allocator->Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits); + } + + if (mapping->sparseAlloc) + state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), mapping->size, true); + else + state.soc->gm20b.gmmu.Unmap(offset, mapping->size); + + mappingMap.erase(offset); } catch (const std::out_of_range &e) { state.logger->Warn("Couldn't find region to unmap at 0x{:X}", offset); } @@ -53,62 +87,94 @@ namespace skyline::service::nvdrv::device::nvhost { return PosixResult::Success; } - PosixResult AsGpu::MapBufferEx(In flags, In kind, In handle, InOut pageSize, In bufferOffset, In mappingSize, InOut offset) { - state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, pageSize: 0x{:X}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, pageSize, bufferOffset, mappingSize, offset); + PosixResult AsGpu::MapBufferEx(In flags, In kind, In handle, In bufferOffset, In mappingSize, InOut offset) { + if (!vm.initialised) + return PosixResult::InvalidArgument; + + state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, bufferOffset, mappingSize, offset); if (flags.remap) { - auto region{regionMap.lower_bound(offset)}; - if (region == regionMap.end()) { + try { + auto mapping{mappingMap.at(offset)}; + + if (mapping->size < mappingSize) { + state.logger->Warn("Cannot remap a partially mapped GPU address space region: 0x{:X}", offset); + return PosixResult::InvalidArgument; + } + + u64 gpuAddress{offset + bufferOffset}; + u8 *cpuPtr{mapping->ptr + bufferOffset}; + + state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize); + + return PosixResult::Success; + } catch (const std::out_of_range &e) { state.logger->Warn("Cannot remap an unmapped GPU address space region: 0x{:X}", offset); return PosixResult::InvalidArgument; } - - if (region->second.size < mappingSize) { - state.logger->Warn("Cannot remap an partially mapped GPU address space region: 0x{:X}", offset); - return PosixResult::InvalidArgument; - } - - u64 gpuAddress{offset + bufferOffset}; - u8 *cpuPtr{region->second.ptr + bufferOffset}; - - if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, mappingSize)) { - state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress); - return PosixResult::InvalidArgument; - } - - return PosixResult::Success; } auto h{core.nvMap.GetHandle(handle)}; if (!h) return PosixResult::InvalidArgument; - if (auto err{h->Duplicate(ctx.internalSession)}; err != PosixResult::Success) - return err; - u8 *cpuPtr{reinterpret_cast(h->address + bufferOffset)}; u64 size{mappingSize ? mappingSize : h->origSize}; - if (flags.fixed) - offset = state.soc->gmmu.MapFixed(offset, cpuPtr, size); - else - offset = state.soc->gmmu.MapAllocate(cpuPtr, size); + if (flags.fixed) { + auto alloc{allocationMap.upper_bound(offset)}; - if (offset == 0) { - state.logger->Warn("Failed to map GPU address space region!"); - return PosixResult::InvalidArgument; + if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size) + throw exception("Cannot perform a fixed mapping into an unallocated region!"); + state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); + + auto mapping{std::make_shared(cpuPtr, offset, size, true, false, alloc->second.sparse)}; + alloc->second.mappings.push_back(mapping); + mappingMap[offset] = mapping; + } else { + bool bigPage{[&] () { + if (util::IsAligned(h->align, vm.bigPageSize)) + return true; + else if (util::IsAligned(h->align, VM::PageSize)) + return false; + else + throw exception("Invalid handle alignment: 0x{:X}", h->align); + }()}; + + auto &allocator{bigPage ? vm.bigPageAllocator : vm.smallPageAllocator}; + u32 pageSize{bigPage ? vm.bigPageSize : VM::PageSize}; + u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits}; + + offset = static_cast(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits; + state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); + + auto mapping{std::make_shared(cpuPtr, offset, size, false, bigPage, false)}; + mappingMap[offset] = mapping; } state.logger->Debug("Mapped to 0x{:X}", offset); - regionMap[offset] = {cpuPtr, size, flags.fixed}; - return PosixResult::Success; } PosixResult AsGpu::GetVaRegions(In bufAddr, InOut bufSize, Out> vaRegions) { - // TODO: impl when we move allocator to nvdrv + if (!vm.initialised) + return PosixResult::InvalidArgument; + + vaRegions = std::array { + VaRegion{ + .pageSize = VM::PageSize, + .pages = vm.smallPageAllocator->vaLimit - vm.smallPageAllocator->vaStart, + .offset = vm.smallPageAllocator->vaStart << VM::PageSizeBits, + }, + VaRegion{ + .pageSize = vm.bigPageSize, + .pages = vm.bigPageAllocator->vaLimit - vm.bigPageAllocator->vaStart, + .offset = vm.bigPageAllocator->vaStart << vm.bigPageSizeBits, + } + }; + return PosixResult::Success; } @@ -116,30 +182,83 @@ namespace skyline::service::nvdrv::device::nvhost { return GetVaRegions(bufAddr, bufSize, vaRegions); } - PosixResult AsGpu::AllocAsEx(In bigPageSize, In asFd, In flags, In vaRangeStart, In vaRangeEnd, In vaRangeSplit) { - // TODO: create the allocator here + PosixResult AsGpu::AllocAsEx(In flags, In asFd, In bigPageSize, In vaRangeStart, In vaRangeEnd, In vaRangeSplit) { + if (vm.initialised) + throw exception("Cannot initialise an address space twice!"); + + state.logger->Debug("bigPageSize: 0x{:X}, asFd: {}, flags: 0x{:X}, vaRangeStart: 0x{:X}, vaRangeEnd: 0x{:X}, vaRangeSplit: 0x{:X}", + bigPageSize, asFd, flags, vaRangeStart, vaRangeEnd, vaRangeSplit); + + if (bigPageSize) { + if (!std::ispow2(bigPageSize)) { + state.logger->Error("Non power-of-2 big page size: 0x{:X}!", bigPageSize); + return PosixResult::InvalidArgument; + } + + if (!(bigPageSize & VM::SupportedBigPageSizes)) { + state.logger->Error("Unsupported big page size: 0x{:X}!", bigPageSize); + return PosixResult::InvalidArgument; + } + + vm.bigPageSize = bigPageSize; + vm.bigPageSizeBits = std::countr_zero(bigPageSize); + + vm.vaRangeStart = bigPageSize << VM::VaStartShift; + } + + if (vaRangeStart) { + vm.vaRangeStart = vaRangeStart; + vm.vaRangeSplit = vaRangeSplit; + vm.vaRangeEnd = vaRangeEnd; + } + + u64 startPages{vm.vaRangeStart >> VM::PageSizeBits}; + u64 endPages{vm.vaRangeSplit >> VM::PageSizeBits}; + vm.smallPageAllocator = std::make_unique(startPages, endPages); + + u64 startBigPages{vm.vaRangeSplit >> vm.bigPageSizeBits}; + u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits}; + vm.bigPageAllocator = std::make_unique(startBigPages, endBigPages); + + vm.initialised = true; + return PosixResult::Success; } PosixResult AsGpu::Remap(span entries) { - constexpr u32 BigPageSize{0x10}; //!< The big page size of the GPU - for (const auto &entry : entries) { - auto h{core.nvMap.GetHandle(entry.handle)}; - if (!h) + u64 virtAddr{static_cast(entry.asOffsetBigPages) << vm.bigPageSizeBits}; + u64 size{static_cast(entry.bigPages) << vm.bigPageSizeBits}; + + auto alloc{allocationMap.upper_bound(virtAddr)}; + + if (alloc-- == allocationMap.begin() || (virtAddr - alloc->first) + size > alloc->second.size) { + state.logger->Warn("Cannot remap into an unallocated region!"); return PosixResult::InvalidArgument; + } - u64 virtAddr{static_cast(entry.asOffsetBigPages) << BigPageSize}; - u8 *cpuPtr{reinterpret_cast(h->address + (static_cast(entry.handleOffsetBigPages) << BigPageSize))}; - u64 size{static_cast(entry.bigPages) << BigPageSize}; + if (!alloc->second.sparse) { + state.logger->Warn("Cannot remap a non-sparse mapping!"); + return PosixResult::InvalidArgument; + } - state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size); + if (!entry.handle) { + state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true); + } else { + auto h{core.nvMap.GetHandle(entry.handle)}; + if (!h) + return PosixResult::InvalidArgument; + + u8 *cpuPtr{reinterpret_cast(h->address + (static_cast(entry.handleOffsetBigPages) << vm.bigPageSizeBits))}; + + state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size); + } } return PosixResult::Success; } -#include +#include static constexpr u32 AsGpuMagic{0x41}; VARIABLE_IOCTL_HANDLER_FUNC(AsGpu, ({ @@ -152,7 +271,7 @@ namespace skyline::service::nvdrv::device::nvhost { IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(AsGpuMagic), FUNC(0x5), UnmapBuffer, ARGS(In)) IOCTL_CASE_ARGS(INOUT, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x6), - MapBufferEx, ARGS(In, In, In, InOut, In, In, InOut)) + MapBufferEx, ARGS(In, In, In, Pad, In, In, InOut)) IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8), GetVaRegions, ARGS(In, InOut, Pad, Out>)) IOCTL_CASE_ARGS(IN, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x9), @@ -166,5 +285,5 @@ namespace skyline::service::nvdrv::device::nvhost { INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8), GetVaRegions3, ARGS(In, InOut, Pad, Out>)) })) -#include +#include } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h index ec109b08..edce48bf 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h @@ -3,6 +3,8 @@ #pragma once +#include + #include namespace skyline::service::nvdrv::device::nvhost { @@ -12,18 +14,64 @@ namespace skyline::service::nvdrv::device::nvhost { */ class AsGpu : public NvDevice { private: - struct AddressSpaceRegion { + struct Mapping { u8 *ptr; + u64 offset; u64 size; bool fixed; + bool bigPage; // Only valid if fixed == false + bool sparseAlloc; + + Mapping(u8 *ptr, u64 offset, u64 size, bool fixed, bool bigPage, bool sparseAlloc) : ptr(ptr), + offset(offset), + size(size), + fixed(fixed), + bigPage(bigPage), + sparseAlloc(sparseAlloc) {} }; - std::map regionMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag. + struct Allocation { + u64 size; + std::list> mappings; + u32 pageSize; + bool sparse; + }; + + std::map> mappingMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag. + + std::map allocationMap; + + + struct VM { + static constexpr u32 PageSize{0x1000}; + static constexpr u32 PageSizeBits{std::countr_zero(PageSize)}; + + static constexpr u32 SupportedBigPageSizes{0x30000}; + static constexpr u32 DefaultBigPageSize{0x20000}; + u32 bigPageSize{DefaultBigPageSize}; + u32 bigPageSizeBits{std::countr_zero(DefaultBigPageSize)}; + + static constexpr u32 VaStartShift{10}; + static constexpr u64 DefaultVaSplit{1ULL << 34}; + static constexpr u64 DefaultVaRange{1ULL << 37}; + u64 vaRangeStart{DefaultBigPageSize << VaStartShift}; + u64 vaRangeSplit{DefaultVaSplit}; + u64 vaRangeEnd{DefaultVaRange}; + + using Allocator = FlatAllocator; + + std::unique_ptr bigPageAllocator{}; + std::unique_ptr smallPageAllocator{}; + + bool initialised{}; + } vm; + public: struct MappingFlags { bool fixed : 1; - u8 _pad0_ : 7; + bool sparse : 1; + u8 _pad0_ : 6; bool remap : 1; u32 _pad1_ : 23; }; @@ -77,7 +125,7 @@ namespace skyline::service::nvdrv::device::nvhost { * @brief Maps a region into this address space with extra parameters * @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_MAP_BUFFER_EX */ - PosixResult MapBufferEx(In flags, In kind, In handle, InOut pageSize, In bufferOffset, In mappingSize, InOut offset); + PosixResult MapBufferEx(In flags, In kind, In handle, In bufferOffset, In mappingSize, InOut offset); /** * @brief Returns info about the address space and its page sizes @@ -94,7 +142,7 @@ namespace skyline::service::nvdrv::device::nvhost { * @brief Allocates this address space with the given parameters * @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_ALLOC_AS_EX */ - PosixResult AllocAsEx(In bigPageSize, In asFd, In flags, In vaRangeStart, In vaRangeEnd, In vaRangeSplit); + PosixResult AllocAsEx(In flags, In asFd, In bigPageSize, In vaRangeStart, In vaRangeEnd, In vaRangeSplit); /** * @brief Remaps a region of the GPU address space diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp index 81d85b19..45a1ca33 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp @@ -234,7 +234,7 @@ namespace skyline::service::nvdrv::device::nvhost { return nullptr; } -#include +#include static constexpr u32 CtrlMagic{0}; IOCTL_HANDLER_FUNC(Ctrl, ({ @@ -254,5 +254,5 @@ namespace skyline::service::nvdrv::device::nvhost { IOCTL_CASE_RESULT(INOUT, SIZE(0x183), MAGIC(CtrlMagic), FUNC(0x1B), PosixResult::InvalidArgument) // GetConfig isn't available in production })) -#include +#include } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp index dbe5f2ab..e8301071 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp @@ -62,7 +62,7 @@ namespace skyline::service::nvdrv::device::nvhost { } } -#include +#include static constexpr u32 CtrlGpuMagic{0x47}; IOCTL_HANDLER_FUNC(CtrlGpu, ({ @@ -77,5 +77,5 @@ namespace skyline::service::nvdrv::device::nvhost { IOCTL_CASE_ARGS(OUT, SIZE(0x8), MAGIC(CtrlGpuMagic), FUNC(0x14), GetActiveSlotMask, ARGS(Out, Out)) })) -#include +#include } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp index c326c528..af2b3549 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp @@ -104,7 +104,7 @@ namespace skyline::service::nvdrv::device::nvhost { } } -#include +#include static constexpr u32 GpuChannelUserMagic{0x47}; static constexpr u32 GpuChannelMagic{0x48}; @@ -138,5 +138,5 @@ namespace skyline::service::nvdrv::device::nvhost { INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x18), MAGIC(GpuChannelMagic), FUNC(0x1B), SubmitGpfifo2, ARGS(In, In, InOut, InOut)) })) -#include +#include } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp index 1eb36e05..586e3e6c 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp @@ -115,7 +115,7 @@ namespace skyline::service::nvdrv::device { return PosixResult::Success; } -#include "deserialisation/macro_def.h" +#include "deserialisation/macro_def.inc" static constexpr u32 NvMapMagic{1}; IOCTL_HANDLER_FUNC(NvMap, ({ @@ -132,6 +132,6 @@ namespace skyline::service::nvdrv::device { IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(NvMapMagic), FUNC(0xE), GetId, ARGS(Out, In)) })) -#include "deserialisation/macro_undef.h" +#include "deserialisation/macro_undef.inc" } diff --git a/app/src/main/cpp/skyline/soc.h b/app/src/main/cpp/skyline/soc.h index a0316a23..717bb321 100644 --- a/app/src/main/cpp/skyline/soc.h +++ b/app/src/main/cpp/skyline/soc.h @@ -3,7 +3,6 @@ #pragma once -#include "soc/gmmu.h" #include "soc/host1x.h" #include "soc/gm20b.h" @@ -14,10 +13,9 @@ namespace skyline::soc { */ class SOC { public: - gmmu::GraphicsMemoryManager gmmu; host1x::Host1X host1x; gm20b::GM20B gm20b; - SOC(const DeviceState &state) : gmmu(state), gm20b(state) {} + SOC(const DeviceState &state) : gm20b(state) {} }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b.cpp b/app/src/main/cpp/skyline/soc/gm20b.cpp new file mode 100644 index 00000000..46c4ce9d --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b.cpp @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "gm20b.h" + +namespace skyline { + template class FlatAddressSpaceMap; + template class FlatMemoryManager; +} + +namespace skyline::soc::gm20b { + GM20B::GM20B(const DeviceState &state) : + fermi2D(state), + keplerMemory(state), + maxwell3D(state), + maxwellCompute(state), + maxwellDma(state), + gpfifo(state) {} +} diff --git a/app/src/main/cpp/skyline/soc/gm20b.h b/app/src/main/cpp/skyline/soc/gm20b.h index a5b7a9bb..7ad39445 100644 --- a/app/src/main/cpp/skyline/soc/gm20b.h +++ b/app/src/main/cpp/skyline/soc/gm20b.h @@ -3,23 +3,28 @@ #pragma once +#include #include "gm20b/engines/maxwell_3d.h" #include "gm20b/gpfifo.h" namespace skyline::soc::gm20b { /** * @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations - * @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly + * @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly */ class GM20B { public: + static constexpr u8 AddressSpaceBits{40}; //!< The width of the GMMU AS + using GMMU = FlatMemoryManager; + engine::Engine fermi2D; engine::maxwell3d::Maxwell3D maxwell3D; engine::Engine maxwellCompute; engine::Engine maxwellDma; engine::Engine keplerMemory; GPFIFO gpfifo; + GMMU gmmu; - GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {} + GM20B(const DeviceState &state); }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp index ecbc0d7f..552ae145 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) -#include +#include #include namespace skyline::soc::gm20b::engine::maxwell3d { diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index dfdcfa3a..4bc7004b 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -157,7 +157,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { switch (registers.semaphore.info.structureSize) { case Registers::SemaphoreInfo::StructureSize::OneWord: - state.soc->gmmu.Write(static_cast(result), registers.semaphore.address.Pack()); + state.soc->gm20b.gmmu.Write(registers.semaphore.address.Pack(), static_cast(result)); break; case Registers::SemaphoreInfo::StructureSize::FourWords: { // Convert the current nanosecond time to GPU ticks @@ -167,7 +167,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u64 nsTime{util::GetTimeNs()}; u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; - state.soc->gmmu.Write(FourWordResult{result, timestamp}, registers.semaphore.address.Pack()); + state.soc->gm20b.gmmu.Write(registers.semaphore.address.Pack(), FourWordResult{result, timestamp}); break; } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index 1b3a41c9..eea9967c 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -56,7 +56,7 @@ namespace skyline::soc::gm20b { } pushBufferData.resize(gpEntry.size); - state.soc->gmmu.Read(pushBufferData, gpEntry.Address()); + state.soc->gm20b.gmmu.Read(pushBufferData, gpEntry.Address()); for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { // An entry containing all zeroes is a NOP, skip over it @@ -88,8 +88,7 @@ namespace skyline::soc::gm20b { return; default: - state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast(methodHeader.secOp)); - break; + throw exception("Unsupported pushbuffer method SecOp: {}", static_cast(methodHeader.secOp)); } } } @@ -106,7 +105,7 @@ namespace skyline::soc::gm20b { try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); pushBuffers->Process([this](GpEntry gpEntry) { - state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address()); + state.logger->Warn("Processing pushbuffer: 0x{:X}", gpEntry.Address()); Process(gpEntry); }); } catch (const signal::SignalException &e) { diff --git a/app/src/main/cpp/skyline/soc/gmmu.cpp b/app/src/main/cpp/skyline/soc/gmmu.cpp deleted file mode 100644 index 99e5e674..00000000 --- a/app/src/main/cpp/skyline/soc/gmmu.cpp +++ /dev/null @@ -1,214 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 -// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) - -#include -#include "gmmu.h" - -namespace skyline::soc::gmmu { - constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space - - GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) { - constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space - constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero - - // Create the initial chunk that will be split to create new chunks - ChunkDescriptor baseChunk(gpuAddressSpaceBase, gpuAddressSpaceSize, nullptr, ChunkState::Unmapped); - chunks.push_back(baseChunk); - } - - std::optional GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) { - auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool { - return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState; - })}; - - if (chunk != chunks.end()) - return *chunk; - - return std::nullopt; - } - - u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) { - auto chunkEnd{chunks.end()}; - for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) { - if (chunk->CanContain(newChunk)) { - auto oldChunk{*chunk}; - u64 newSize{newChunk.virtualAddress - chunk->virtualAddress}; - u64 extension{chunk->size - newSize - newChunk.size}; - - if (newSize == 0) { - *chunk = newChunk; - } else { - chunk->size = newSize; - chunk = chunks.insert(std::next(chunk), newChunk); - } - - if (extension) - chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state)); - - return newChunk.virtualAddress; - } else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) { - chunk->size = newChunk.virtualAddress - chunk->virtualAddress; - - // Deletes all chunks that are within the chunk being inserted and split the final one - auto tailChunk{std::next(chunk)}; - while (tailChunk != chunkEnd) { - if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size) - break; - - tailChunk = chunks.erase(tailChunk); - chunkEnd = chunks.end(); - } - - // The given chunk is too large to fit into existing chunks - if (tailChunk == chunkEnd) - break; - - u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress}; - tailChunk->virtualAddress += chunkSliceOffset; - tailChunk->size -= chunkSliceOffset; - if (tailChunk->state == ChunkState::Mapped) - tailChunk->cpuPtr += chunkSliceOffset; - - // If the size of the head chunk is zero then we can directly replace it with our new one rather than inserting it - auto headChunk{std::prev(tailChunk)}; - if (headChunk->size == 0) - *headChunk = newChunk; - else - chunks.insert(std::next(headChunk), newChunk); - - return newChunk.virtualAddress; - } - } - - throw exception("Failed to insert chunk into GPU address space!"); - } - - u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) { - size = util::AlignUp(size, GpuPageSize); - - std::unique_lock lock(mutex); - auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)}; - if (!newChunk) [[unlikely]] - return 0; - - auto chunk{*newChunk}; - chunk.size = size; - chunk.state = ChunkState::Reserved; - - return InsertChunk(chunk); - } - - u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) { - if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]] - return 0; - - size = util::AlignUp(size, GpuPageSize); - - std::unique_lock lock(mutex); - return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved)); - } - - u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) { - size = util::AlignUp(size, GpuPageSize); - - std::unique_lock lock(mutex); - auto mappedChunk{FindChunk(ChunkState::Unmapped, size)}; - if (!mappedChunk) [[unlikely]] - return 0; - - auto chunk{*mappedChunk}; - chunk.cpuPtr = cpuPtr; - chunk.size = size; - chunk.state = ChunkState::Mapped; - - return InsertChunk(chunk); - } - - u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) { - if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]] - return 0; - - size = util::AlignUp(size, GpuPageSize); - - std::unique_lock lock(mutex); - return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped)); - } - - bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) { - if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]] - return false; - - try { - std::unique_lock lock(mutex); - InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped)); - } catch (const std::exception &e) { - return false; - } - - return true; - } - - void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) { - std::shared_lock lock(mutex); - - auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool { - return address < chunk.virtualAddress; - })}; - - if (chunk == chunks.end() || chunk->state != ChunkState::Mapped) - throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size); - - chunk--; - - u64 initialSize{size}; - u64 chunkOffset{virtualAddress - chunk->virtualAddress}; - u8 *source{chunk->cpuPtr + chunkOffset}; - u64 sourceSize{std::min(chunk->size - chunkOffset, size)}; - - // A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks - while (size) { - std::memcpy(destination + (initialSize - size), source, sourceSize); - - size -= sourceSize; - if (size) { - if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped) - throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size); - - source = chunk->cpuPtr; - sourceSize = std::min(chunk->size, size); - } - } - } - - void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) { - std::shared_lock lock(mutex); - - auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool { - return address < chunk.virtualAddress; - })}; - - if (chunk == chunks.end() || chunk->state != ChunkState::Mapped) - throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size); - - chunk--; - - u64 initialSize{size}; - u64 chunkOffset{virtualAddress - chunk->virtualAddress}; - u8 *destination{chunk->cpuPtr + chunkOffset}; - u64 destinationSize{std::min(chunk->size - chunkOffset, size)}; - - // A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks - while (size) { - std::memcpy(destination, source + (initialSize - size), destinationSize); - - size -= destinationSize; - if (size) { - if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped) - throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size); - - destination = chunk->cpuPtr; - destinationSize = std::min(chunk->size, size); - } - } - } -} diff --git a/app/src/main/cpp/skyline/soc/gmmu.h b/app/src/main/cpp/skyline/soc/gmmu.h deleted file mode 100644 index 1d82a211..00000000 --- a/app/src/main/cpp/skyline/soc/gmmu.h +++ /dev/null @@ -1,140 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 -// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) - -#pragma once - -#include - -namespace skyline::soc::gmmu { - enum class ChunkState { - Unmapped, //!< The chunk is unmapped - Reserved, //!< The chunk is reserved - Mapped //!< The chunk is mapped and a CPU side address is present - }; - - struct ChunkDescriptor { - u64 virtualAddress; //!< The address of the chunk in the virtual address space - u64 size; //!< The size of the chunk in bytes - u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped) - ChunkState state; - - ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {} - - /** - * @return If the given chunk can be contained wholly within this chunk - */ - inline bool CanContain(const ChunkDescriptor &chunk) { - return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress)); - } - }; - - /** - * @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1 - * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment - */ - class GraphicsMemoryManager { - private: - const DeviceState &state; - std::vector chunks; - std::shared_mutex mutex; - - /** - * @brief Finds a chunk in the virtual address space that is larger than meets the given requirements - * @note vmmMutex MUST be locked when calling this - * @param desiredState The state of the chunk to find - * @param size The minimum size of the chunk to find - * @param alignment The minimum alignment of the chunk to find - * @return The first applicable chunk - */ - std::optional FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0); - - /** - * @brief Inserts a chunk into the chunk list, resizing and splitting as necessary - * @note vmmMutex MUST be locked when calling this - * @param newChunk The chunk to insert - * @return The base virtual address of the inserted chunk - */ - u64 InsertChunk(const ChunkDescriptor &newChunk); - - public: - GraphicsMemoryManager(const DeviceState &state); - - /** - * @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping - * @param size The size of the region to reserve - * @param alignment The alignment of the region to reserve - * @return The base virtual address of the reserved region - */ - u64 ReserveSpace(u64 size, u64 alignment); - - /** - * @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping - * @param virtualAddress The virtual base address of the region to allocate - * @param size The size of the region to allocate - * @return The base virtual address of the reserved region - */ - u64 ReserveFixed(u64 virtualAddress, u64 size); - - /** - * @brief Maps a CPU memory region into an automatically chosen region of the virtual address space - * @param cpuPtr A pointer to the region to be mapped into the virtual address space - * @param size The size of the region to map - * @return The base virtual address of the mapped region - */ - u64 MapAllocate(u8 *cpuPtr, u64 size); - - /** - * @brief Maps a CPU memory region to a fixed region in the virtual address space - * @param virtualAddress The target virtual address of the region - * @param cpuPtr A pointer to the region to be mapped into the virtual address space - * @param size The size of the region to map - * @return The base virtual address of the mapped region - */ - u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size); - - /** - * @brief Unmaps all chunks in the given region from the virtual address space - * @return Whether the operation succeeded - */ - bool Unmap(u64 virtualAddress, u64 size); - - void Read(u8 *destination, u64 virtualAddress, u64 size); - - /** - * @brief Reads in a span from a region of the virtual address space - */ - template - void Read(span destination, u64 virtualAddress) { - Read(reinterpret_cast(destination.data()), virtualAddress, destination.size_bytes()); - } - - /** - * @brief Reads in an object from a region of the virtual address space - * @tparam T The type of object to return - */ - template - T Read(u64 virtualAddress) { - T obj; - Read(reinterpret_cast(&obj), virtualAddress, sizeof(T)); - return obj; - } - - void Write(u8 *source, u64 virtualAddress, u64 size); - - /** - * @brief Writes out a span to a region of the virtual address space - */ - template - void Write(span source, u64 virtualAddress) { - Write(reinterpret_cast(source.data()), virtualAddress, source.size_bytes()); - } - - /** - * @brief Reads in an object from a region of the virtual address space - */ - template - void Write(T source, u64 virtualAddress) { - Write(reinterpret_cast(&source), virtualAddress, sizeof(T)); - } - }; -}