mirror of
synced 2025-03-03 23:55:21 +03:00
NEEDS CLEANUP: Reimplement GPU VMM and rewrite nvdrv VM impl
This commit is contained in:
@ -96,7 +96,7 @@ add_library(skyline SHARED
Normal file
Normal file
@ -0,0 +1,155 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <concepts>
#include <common.h>
namespace skyline {
template<typename VaType, size_t AddressSpaceBits>
concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
* @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatAddressSpaceMap {
* @brief Represents a block of memory in the AS
struct Block {
VaType virt{UnmappedVa}; //!< VA of the block
PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block is encountered
bool flag{}; //!< General purpose flag for use by derived classes
Block() = default;
Block(VaType virt, PaType phys, bool flag) : virt(virt), phys(phys), flag(flag) {}
constexpr bool Valid() {
return virt != UnmappedVa;
constexpr bool Mapped() {
return phys != UnmappedPa;
constexpr bool Unmapped() {
return phys == UnmappedPa;
bool operator<(const VaType &pVirt) const {
return virt < pVirt;
std::mutex blockMutex;
std::vector<Block> blocks{Block{}};
* @brief Maps a PA range into the given AS region, optionally setting the flag
* @note blockMutex MUST be locked when calling this
void MapLocked(VaType virt, PaType phys, VaType size, bool flag = {});
* @brief Unmaps the given range and merges it with other unmapped regions
* @note blockMutex MUST be locked when calling this
void UnmapLocked(VaType virt, VaType size);
static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + ((1ULL << (AddressSpaceBits - 1)) - 1)}; //!< The maximum VA that this AS can technically reach
VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
FlatAddressSpaceMap(VaType pVaLimit);
FlatAddressSpaceMap() = default;
* @brief Locked version of MapLocked
void Map(VaType virt, PaType phys, VaType size, bool flag = {});
* @brief Locked version of UnmapLocked
void Unmap(VaType virt, VaType size);
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits> {
* @return A placeholder address for sparse mapped regions, this means nothing
static u8 *SparsePlaceholderAddress() {
return reinterpret_cast<u8 *>(0xCAFEBABE);
void Read(u8 *destination, VaType virt, VaType size);
template<typename T>
void Read(span <T> destination, VaType virt) {
Read(reinterpret_cast<u8 *>(destination.data()), virt, destination.size_bytes());
template<typename T>
T Read(VaType virt) {
T obj;
Read(reinterpret_cast<u8 *>(&obj), virt, sizeof(T));
return obj;
void Write(VaType virt, u8 *source, VaType size);
template<typename T>
void Write(VaType virt, span <T> source) {
Write(virt, reinterpret_cast<u8 *>(source.data()), source.size_bytes());
template<typename T>
void Write(VaType virt, T source) {
Write(virt, reinterpret_cast<u8 *>(&source), sizeof(T));
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatAllocator : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once this reaches the AS limit the slower allocation path will be used
VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
FlatAllocator(VaType vaStart, VaType vaLimit);
* @brief Allocates a region in the AS of the given size and returns its address
VaType Allocate(VaType size);
* @brief Marks the given region in the AS as allocated
void AllocateFixed(VaType virt, VaType size);
* @brief Frees an AS region so it can be used again
void Free(VaType virt, VaType size);
Normal file
Normal file
@ -0,0 +1,354 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/trace.h>
#include <kernel/types/KProcess.h>
#include "address_space.h"
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits>
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
namespace skyline {
MAP_MEMBER()::FlatAddressSpaceMap(VaType pVaLimit) : vaLimit(pVaLimit) {
if (pVaLimit > VaMaximum)
throw exception("Invalid VA limit!");
MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, bool flag) {
TRACE_EVENT("containers", "FlatAddressSpaceMap::Map");
VaType virtEnd{virt + size};
if (virtEnd > vaLimit)
throw exception("Trying to map a block past the VA limit!");
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
if (blockEndSuccessor == blocks.begin())
throw exception("Unexpected Memory Manager state!");
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
if (blockEndSuccessor != blocks.end()) {
// We have blocks in front of us, if one is directly in front then we don't have to add a tail
if (blockEndSuccessor->virt != virtEnd) {
PaType tailPhys{[&]() -> PaType {
if (!PaContigSplit || blockEndPredecessor->Unmapped())
return blockEndPredecessor->phys; // Always propagate unmapped regions
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
if (blockEndPredecessor->virt >= virt) {
// If this block's start would be overlapped by the map then reuse it as a tail block
blockEndPredecessor->virt = virtEnd;
blockEndPredecessor->phys = tailPhys;
blockEndPredecessor->flag = blockEndPredecessor->flag;
} else {
// Else insert a new one and we're done
blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, tailPhys, blockEndPredecessor->flag)});
} else {
// blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped chunk
if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
// Move the unmapped block start backwards
blockEndPredecessor->virt = virtEnd;
} else {
// Else insert a new one and we're done
blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, UnmappedPa, false)});
auto blockStartSuccessor{blockEndPredecessor};
// Walk the block vector to find the start successor as this is more efficient than another binary search in most scenarios
while (std::prev(blockStartSuccessor)->virt >= virt)
std::advance(blockStartSuccessor, -1);
if (blockStartSuccessor->virt > virtEnd)
throw exception("Unexpected Memory Manager state!");
if (blockStartSuccessor->virt == virtEnd) {
// We need to create a new block as there are none spare that we would overwrite
blocks.insert(blockStartSuccessor, Block(virt, phys, flag));
} else {
blockStartSuccessor->virt = virt;
blockStartSuccessor->phys = phys;
blockStartSuccessor->flag = flag;
// Erase overwritten blocks
if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) {
if (eraseStart == blockEndPredecessor)
blocks.erase(eraseStart, blockEndPredecessor);
MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
TRACE_EVENT("containers", "FlatAddressSpaceMap::Unmap");
VaType virtEnd{virt + size};
if (virtEnd > vaLimit)
throw exception("Trying to map a block past the VA limit!");
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
if (blockEndSuccessor == blocks.begin())
throw exception("Unexpected Memory Manager state!");
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
auto walkBackToPredecessor{[&](auto iter) {
while (iter->virt >= virt)
std::advance(iter, -1);
return iter;
auto eraseBlocksWithEndUnmapped{[&] (auto unmappedEnd) {
auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
auto blockStartSuccessor{std::next(blockStartPredecessor)};
auto eraseEnd{[&]() {
if (blockStartPredecessor->Unmapped()) {
// If the start predecessor is unmapped then we can erase everything in our region and be done
return std::next(unmappedEnd);
} else {
// Else reuse the end predecessor as the start of our unmapped region then erase all up to it
unmappedEnd->virt = virt;
return unmappedEnd;
// We can't have two unmapped regions after each other
if (eraseEnd == blockStartSuccessor || (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))
throw exception("Unexpected Memory Manager state!");
blocks.erase(blockStartSuccessor, eraseEnd);
// We can avoid any splitting logic if these are the case
if (blockEndPredecessor->Unmapped()) {
if (blockEndPredecessor->virt > virt)
return; // The region is unmapped, bail out early
} else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
return; // The region is unmapped here and doesn't need splitting, bail out early
} else if (blockEndSuccessor == blocks.end()) {
// This should never happen as the end should always follow an unmapped block
throw exception("Unexpected Memory Manager state!");
} else if (blockEndSuccessor->virt != virtEnd) {
// If one block is directly in front then we don't have to add a tail
// The previous block is mapped so we will need to add a tail with an offset
PaType tailPhys{[&]() {
if constexpr (PaContigSplit)
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
return blockEndPredecessor->phys;
if (blockEndPredecessor->virt >= virt) {
// If this block's start would be overlapped by the unmap then reuse it as a tail block
blockEndPredecessor->virt = virtEnd;
blockEndPredecessor->phys = tailPhys;
} else {
blocks.insert(blockEndSuccessor, {Block(virt, UnmappedPa, false), Block(virtEnd, tailPhys, blockEndPredecessor->flag)});
return; // The previous block is mapped and ends bef
// Walk the block vector to find the start predecessor as this is more efficient than another binary search in most scenarios
auto blockStartPredecessor{walkBackToPredecessor(blockEndPredecessor)};
auto blockStartSuccessor{std::next(blockStartPredecessor)};
if (blockStartSuccessor->virt > virtEnd)
throw exception("Unexpected Memory Manager state!");
if (blockStartSuccessor->virt == virtEnd) {
// There are no blocks between the start and the end that would let us skip inserting a new one for head
// The previous block is may be unmapped, if so we don't need to insert any unmaps after it
if (blockStartPredecessor->Mapped())
blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, false));
} else if (blockStartPredecessor->Unmapped()) {
// If the previous block is unmapped
blocks.erase(blockStartSuccessor, blockEndPredecessor);
} else {
// Add in the unmapped block header
blockStartSuccessor->virt = virt;
blockStartSuccessor->phys = UnmappedPa;
// Erase overwritten blocks, skipping the first one as we have written the unmapped start block there
if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) {
if (eraseStart == blockEndPredecessor)
blocks.erase(eraseStart, blockEndPredecessor);
MAP_MEMBER(void)::Map(VaType virt, PaType phys, VaType size, bool flag) {
std::scoped_lock lock(blockMutex);
MapLocked(virt, phys, size, flag);
MAP_MEMBER(void)::Unmap(VaType virt, VaType size) {
std::scoped_lock lock(blockMutex);
UnmapLocked(virt, size);
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size) {
std::scoped_lock lock(this->blockMutex);
TRACE_EVENT("containers", "FlatMemoryManager::Read");
VaType virtEnd{virt + size};
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
return virt < block.virt;
auto predecessor{std::prev(successor)};
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
VaType blockReadSize{std::min(successor->virt - virt, size)};
while (size) {
if (predecessor->phys == nullptr) {
if (predecessor->flag) // Sparse mapping
std::memset(destination, 0, blockReadSize);
throw exception("Page fault at: 0x{:X}", predecessor->virt);
} else {
std::memcpy(destination, blockPhys, blockReadSize);
destination += blockReadSize;
size -= blockReadSize;
if (size) {
predecessor = successor++;
blockPhys = predecessor->phys;
blockReadSize = std::min(successor->virt - predecessor->virt, size);
MM_MEMBER(void)::Write(VaType virt, u8 *source, VaType size) {
std::scoped_lock lock(this->blockMutex);
TRACE_EVENT("containers", "FlatMemoryManager::Write");
VaType virtEnd{virt + size};
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
return virt < block.virt;
auto predecessor{std::prev(successor)};
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
VaType blockWriteSize{std::min(successor->virt - virt, size)};
while (size) {
if (predecessor->phys == nullptr) {
if (!predecessor->flag) // Sparse mappings allow unmapped writes
throw exception("Page fault at: 0x{:X}", predecessor->virt);
} else {
std::memcpy(blockPhys, source, blockWriteSize);
source += blockWriteSize;
size -= blockWriteSize;
if (size) {
predecessor = successor++;
blockPhys = predecessor->phys;
blockWriteSize = std::min(successor->virt - predecessor->virt, size);
ALLOC_MEMBER()::FlatAllocator(VaType vaStart, VaType vaLimit) : Base(vaLimit), vaStart(vaStart), currentLinearAllocEnd(vaStart) {}
ALLOC_MEMBER(VaType)::Allocate(VaType size) {
std::scoped_lock lock(this->blockMutex);
TRACE_EVENT("containers", "FlatAllocator::Allocate");
VaType allocStart{UnmappedVa};
VaType allocEnd{currentLinearAllocEnd + size};
if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
auto allocEndSuccessor{std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
if (allocEndSuccessor == this->blocks.begin())
throw exception("Unexpected allocator state!");
auto allocEndPredecessor{std::prev(allocEndSuccessor)};
if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
allocStart = currentLinearAllocEnd;
} else {
while (allocEndSuccessor != this->blocks.end()) {
if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || allocEndPredecessor->Mapped() ) {
allocStart = allocEndPredecessor->virt;
allocEndPredecessor = allocEndSuccessor++;
if (allocEndSuccessor == this->blocks.end()) {
allocEnd = allocEndPredecessor->virt + size;
if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
allocStart = allocEndPredecessor->virt;
if (allocStart != UnmappedVa) {
currentLinearAllocEnd = allocStart + size;
} else { // If linear allocation overflows the AS then find a gap
if (this->blocks.size() <= 2)
throw exception("Unexpected allocator state!");
auto searchPredecessor{this->blocks.begin()};
auto searchSuccessor{std::next(searchPredecessor)};
while (searchSuccessor != this->blocks.end() &&
(searchSuccessor->virt - searchPredecessor->virt < size || searchPredecessor->Mapped())) {
searchPredecessor = searchSuccessor++;
if (searchSuccessor != this->blocks.end())
allocStart = searchPredecessor->virt;
throw exception("Unexpected allocator state!");
this->MapLocked(allocStart, true, size);
return allocStart;
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
this->MapLocked(virt, true, size);
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
this->UnmapLocked(virt, size);
@ -3,6 +3,7 @@
#pragma once
#include <common/trace.h>
#include <common.h>
namespace skyline {
@ -51,10 +52,15 @@ namespace skyline {
template<typename F>
[[noreturn]] void Process(F function) {
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
while (true) {
if (start == end) {
std::unique_lock lock(productionMutex);
produceCondition.wait(lock, [this]() { return start != end; });
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
while (start != end) {
@ -183,8 +183,6 @@ namespace skyline::signal {
std::call_once(signalHandlerOnce[signal], [signal, &action]() {
struct sigaction oldAction;
Sigaction(signal, &action, &oldAction);
if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags)
throw exception("Old sigaction flags aren't equivalent to the replaced signal: {:#b} | {:#b}", oldAction.sa_flags, action.sa_flags);
DefaultSignalHandlers.at(signal).function = (oldAction.sa_flags & SA_SIGINFO) ? oldAction.sa_sigaction : reinterpret_cast<void (*)(int, struct siginfo *, void *)>(oldAction.sa_handler);
perfetto::Category("kernel").SetDescription("Events from parts of the HLE kernel"),
perfetto::Category("guest").SetDescription("Events relating to guest code"),
perfetto::Category("gpu").SetDescription("Events from the emulated GPU"),
perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations")
perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations"),
perfetto::Category("containers").SetDescription("Events from custom container implementations")
namespace skyline::trace {
@ -13,6 +13,7 @@ namespace skyline::service {
Busy = 16, // EBUSY
InvalidArgument = 22, // EINVAL
InappropriateIoctlForDevice = 25, // ENOTTY
FunctionNotImplemented = 38, // ENOSYS
NotSupported = 95, // EOPNOTSUPP, ENOTSUP
TimedOut = 110, // ETIMEDOUT
@ -1,10 +1,16 @@
// SPDX-License-Identifier: MIT OR MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/address_space.inc>
#include <soc.h>
#include <services/nvdrv/devices/deserialisation/deserialisation.h>
#include "as_gpu.h"
namespace skyline {
template class FlatAddressSpaceMap<u32, 0, bool, false, false, 32>;
template class FlatAllocator<u32, 0, 32>;
namespace skyline::service::nvdrv::device::nvhost {
AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {}
@ -14,38 +20,66 @@ namespace skyline::service::nvdrv::device::nvhost {
PosixResult AsGpu::AllocSpace(In<u32> pages, In<u32> pageSize, In<MappingFlags> flags, InOut<u64> offset) {
// TODO: track this on the nvdrv side and have the gmmu only do virt -> phys
// Also fix error codes
u64 size{static_cast<u64>(pages) * static_cast<u64>(pageSize)};
state.logger->Debug("pages: 0x{:X}, pageSize: 0x{:X}, flags: ( fixed: {}, sparse: {} ), offset: 0x{:X}", pages, pageSize, flags.fixed, flags.sparse, offset);
if (pageSize != VM::PageSize && pageSize != vm.bigPageSize)
return PosixResult::InvalidArgument;
if (pageSize != vm.bigPageSize && flags.sparse)
return PosixResult::FunctionNotImplemented;
u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits};
auto &allocator{[&] () -> auto & {
if (pageSize == VM::PageSize)
return vm.smallPageAllocator;
return vm.bigPageAllocator;
if (flags.fixed)
offset = state.soc->gmmu.ReserveFixed(offset, size);
allocator->AllocateFixed(offset >> pageSizeBits, pages);
offset = state.soc->gmmu.ReserveSpace(size, offset); // offset contains the input alignment
offset = static_cast<u64>(allocator->Allocate(pages)) << pageSizeBits;
if (offset == 0) {
state.logger->Warn("Failed to allocate GPU address space region!");
return PosixResult::InvalidArgument;
u64 size{static_cast<u64>(pages) * static_cast<u64>(pageSize)};
if (flags.sparse)
state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true);
allocationMap[offset] = {
.size = size,
.pageSize = pageSize,
.sparse = flags.sparse
return PosixResult::Success;
PosixResult AsGpu::FreeSpace(In<u64> offset, In<u32> pages, In<u32> pageSize) {
// TODO: implement this when we add nvdrv side address space allocation
// TODO: implement after UNMAP
return PosixResult::Success;
PosixResult AsGpu::UnmapBuffer(In<u64> offset) {
state.logger->Debug("offset: 0x{:X}", offset);
try {
auto region{regionMap.at(offset)};
auto mapping{mappingMap.at(offset)};
// Non-fixed regions are unmapped so that they can be used by future non-fixed mappings
if (!region.fixed)
if (!state.soc->gmmu.Unmap(offset, region.size))
state.logger->Warn("Failed to unmap region at 0x{:X}", offset);
if (!mapping->fixed) {
auto &allocator{mapping->bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
u32 pageSizeBits{mapping->bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
allocator->Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
if (mapping->sparseAlloc)
state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), mapping->size, true);
state.soc->gm20b.gmmu.Unmap(offset, mapping->size);
} catch (const std::out_of_range &e) {
state.logger->Warn("Couldn't find region to unmap at 0x{:X}", offset);
@ -53,62 +87,94 @@ namespace skyline::service::nvdrv::device::nvhost {
return PosixResult::Success;
PosixResult AsGpu::MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, InOut<u32> pageSize, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset) {
state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, pageSize: 0x{:X}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, pageSize, bufferOffset, mappingSize, offset);
PosixResult AsGpu::MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset) {
if (!vm.initialised)
return PosixResult::InvalidArgument;
state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, bufferOffset, mappingSize, offset);
if (flags.remap) {
auto region{regionMap.lower_bound(offset)};
if (region == regionMap.end()) {
try {
auto mapping{mappingMap.at(offset)};
if (mapping->size < mappingSize) {
state.logger->Warn("Cannot remap a partially mapped GPU address space region: 0x{:X}", offset);
return PosixResult::InvalidArgument;
u64 gpuAddress{offset + bufferOffset};
u8 *cpuPtr{mapping->ptr + bufferOffset};
state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize);
return PosixResult::Success;
} catch (const std::out_of_range &e) {
state.logger->Warn("Cannot remap an unmapped GPU address space region: 0x{:X}", offset);
return PosixResult::InvalidArgument;
if (region->second.size < mappingSize) {
state.logger->Warn("Cannot remap an partially mapped GPU address space region: 0x{:X}", offset);
return PosixResult::InvalidArgument;
u64 gpuAddress{offset + bufferOffset};
u8 *cpuPtr{region->second.ptr + bufferOffset};
if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, mappingSize)) {
state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress);
return PosixResult::InvalidArgument;
return PosixResult::Success;
auto h{core.nvMap.GetHandle(handle)};
if (!h)
return PosixResult::InvalidArgument;
if (auto err{h->Duplicate(ctx.internalSession)}; err != PosixResult::Success)
return err;
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + bufferOffset)};
u64 size{mappingSize ? mappingSize : h->origSize};
if (flags.fixed)
offset = state.soc->gmmu.MapFixed(offset, cpuPtr, size);
offset = state.soc->gmmu.MapAllocate(cpuPtr, size);
if (flags.fixed) {
auto alloc{allocationMap.upper_bound(offset)};
if (offset == 0) {
state.logger->Warn("Failed to map GPU address space region!");
return PosixResult::InvalidArgument;
if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size)
throw exception("Cannot perform a fixed mapping into an unallocated region!");
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size);
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, true, false, alloc->second.sparse)};
mappingMap[offset] = mapping;
} else {
bool bigPage{[&] () {
if (util::IsAligned(h->align, vm.bigPageSize))
return true;
else if (util::IsAligned(h->align, VM::PageSize))
return false;
throw exception("Invalid handle alignment: 0x{:X}", h->align);
auto &allocator{bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
u32 pageSize{bigPage ? vm.bigPageSize : VM::PageSize};
u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits;
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size);
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)};
mappingMap[offset] = mapping;
state.logger->Debug("Mapped to 0x{:X}", offset);
regionMap[offset] = {cpuPtr, size, flags.fixed};
return PosixResult::Success;
PosixResult AsGpu::GetVaRegions(In<u64> bufAddr, InOut<u32> bufSize, Out<std::array<VaRegion, 2>> vaRegions) {
// TODO: impl when we move allocator to nvdrv
if (!vm.initialised)
return PosixResult::InvalidArgument;
vaRegions = std::array<VaRegion, 2> {
.pageSize = VM::PageSize,
.pages = vm.smallPageAllocator->vaLimit - vm.smallPageAllocator->vaStart,
.offset = vm.smallPageAllocator->vaStart << VM::PageSizeBits,
.pageSize = vm.bigPageSize,
.pages = vm.bigPageAllocator->vaLimit - vm.bigPageAllocator->vaStart,
.offset = vm.bigPageAllocator->vaStart << vm.bigPageSizeBits,
return PosixResult::Success;
@ -116,30 +182,83 @@ namespace skyline::service::nvdrv::device::nvhost {
return GetVaRegions(bufAddr, bufSize, vaRegions);
PosixResult AsGpu::AllocAsEx(In<u32> bigPageSize, In<FileDescriptor> asFd, In<u32> flags, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit) {
// TODO: create the allocator here
PosixResult AsGpu::AllocAsEx(In<u32> flags, In<FileDescriptor> asFd, In<u32> bigPageSize, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit) {
if (vm.initialised)
throw exception("Cannot initialise an address space twice!");
state.logger->Debug("bigPageSize: 0x{:X}, asFd: {}, flags: 0x{:X}, vaRangeStart: 0x{:X}, vaRangeEnd: 0x{:X}, vaRangeSplit: 0x{:X}",
bigPageSize, asFd, flags, vaRangeStart, vaRangeEnd, vaRangeSplit);
if (bigPageSize) {
if (!std::ispow2(bigPageSize)) {
state.logger->Error("Non power-of-2 big page size: 0x{:X}!", bigPageSize);
return PosixResult::InvalidArgument;
if (!(bigPageSize & VM::SupportedBigPageSizes)) {
state.logger->Error("Unsupported big page size: 0x{:X}!", bigPageSize);
return PosixResult::InvalidArgument;
vm.bigPageSize = bigPageSize;
vm.bigPageSizeBits = std::countr_zero(bigPageSize);
vm.vaRangeStart = bigPageSize << VM::VaStartShift;
if (vaRangeStart) {
vm.vaRangeStart = vaRangeStart;
vm.vaRangeSplit = vaRangeSplit;
vm.vaRangeEnd = vaRangeEnd;
u64 startPages{vm.vaRangeStart >> VM::PageSizeBits};
u64 endPages{vm.vaRangeSplit >> VM::PageSizeBits};
vm.smallPageAllocator = std::make_unique<VM::Allocator>(startPages, endPages);
u64 startBigPages{vm.vaRangeSplit >> vm.bigPageSizeBits};
u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits};
vm.bigPageAllocator = std::make_unique<VM::Allocator>(startBigPages, endBigPages);
vm.initialised = true;
return PosixResult::Success;
PosixResult AsGpu::Remap(span<RemapEntry> entries) {
constexpr u32 BigPageSize{0x10}; //!< The big page size of the GPU
for (const auto &entry : entries) {
auto h{core.nvMap.GetHandle(entry.handle)};
if (!h)
u64 virtAddr{static_cast<u64>(entry.asOffsetBigPages) << vm.bigPageSizeBits};
u64 size{static_cast<u64>(entry.bigPages) << vm.bigPageSizeBits};
auto alloc{allocationMap.upper_bound(virtAddr)};
if (alloc-- == allocationMap.begin() || (virtAddr - alloc->first) + size > alloc->second.size) {
state.logger->Warn("Cannot remap into an unallocated region!");
return PosixResult::InvalidArgument;
u64 virtAddr{static_cast<u64>(entry.asOffsetBigPages) << BigPageSize};
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << BigPageSize))};
u64 size{static_cast<u64>(entry.bigPages) << BigPageSize};
if (!alloc->second.sparse) {
state.logger->Warn("Cannot remap a non-sparse mapping!");
return PosixResult::InvalidArgument;
state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size);
if (!entry.handle) {
state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true);
} else {
auto h{core.nvMap.GetHandle(entry.handle)};
if (!h)
return PosixResult::InvalidArgument;
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << vm.bigPageSizeBits))};
state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size);
return PosixResult::Success;
#include <services/nvdrv/devices/deserialisation/macro_def.h>
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 AsGpuMagic{0x41};
@ -152,7 +271,7 @@ namespace skyline::service::nvdrv::device::nvhost {
UnmapBuffer, ARGS(In<u64>))
MapBufferEx, ARGS(In<MappingFlags>, In<u32>, In<core::NvMap::Handle::Id>, InOut<u32>, In<u64>, In<u64>, InOut<u64>))
MapBufferEx, ARGS(In<MappingFlags>, In<u32>, In<core::NvMap::Handle::Id>, Pad<u32>, In<u64>, In<u64>, InOut<u64>))
GetVaRegions, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>))
@ -166,5 +285,5 @@ namespace skyline::service::nvdrv::device::nvhost {
GetVaRegions3, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>))
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
@ -3,6 +3,8 @@
#pragma once
#include <common/address_space.h>
#include <services/nvdrv/devices/nvdevice.h>
namespace skyline::service::nvdrv::device::nvhost {
@ -12,18 +14,64 @@ namespace skyline::service::nvdrv::device::nvhost {
class AsGpu : public NvDevice {
struct AddressSpaceRegion {
struct Mapping {
u8 *ptr;
u64 offset;
u64 size;
bool fixed;
bool bigPage; // Only valid if fixed == false
bool sparseAlloc;
Mapping(u8 *ptr, u64 offset, u64 size, bool fixed, bool bigPage, bool sparseAlloc) : ptr(ptr),
sparseAlloc(sparseAlloc) {}
std::map<u64, AddressSpaceRegion> regionMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag.
struct Allocation {
u64 size;
std::list<std::shared_ptr<Mapping>> mappings;
u32 pageSize;
bool sparse;
std::map<u64, std::shared_ptr<Mapping>> mappingMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag.
std::map<u64, Allocation> allocationMap;
struct VM {
static constexpr u32 PageSize{0x1000};
static constexpr u32 PageSizeBits{std::countr_zero(PageSize)};
static constexpr u32 SupportedBigPageSizes{0x30000};
static constexpr u32 DefaultBigPageSize{0x20000};
u32 bigPageSize{DefaultBigPageSize};
u32 bigPageSizeBits{std::countr_zero(DefaultBigPageSize)};
static constexpr u32 VaStartShift{10};
static constexpr u64 DefaultVaSplit{1ULL << 34};
static constexpr u64 DefaultVaRange{1ULL << 37};
u64 vaRangeStart{DefaultBigPageSize << VaStartShift};
u64 vaRangeSplit{DefaultVaSplit};
u64 vaRangeEnd{DefaultVaRange};
using Allocator = FlatAllocator<u32, 0, 32>;
std::unique_ptr<Allocator> bigPageAllocator{};
std::unique_ptr<Allocator> smallPageAllocator{};
bool initialised{};
} vm;
struct MappingFlags {
bool fixed : 1;
u8 _pad0_ : 7;
bool sparse : 1;
u8 _pad0_ : 6;
bool remap : 1;
u32 _pad1_ : 23;
@ -77,7 +125,7 @@ namespace skyline::service::nvdrv::device::nvhost {
* @brief Maps a region into this address space with extra parameters
* @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_MAP_BUFFER_EX
PosixResult MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, InOut<u32> pageSize, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset);
PosixResult MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset);
* @brief Returns info about the address space and its page sizes
@ -94,7 +142,7 @@ namespace skyline::service::nvdrv::device::nvhost {
* @brief Allocates this address space with the given parameters
* @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_ALLOC_AS_EX
PosixResult AllocAsEx(In<u32> bigPageSize, In<FileDescriptor> asFd, In<u32> flags, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit);
PosixResult AllocAsEx(In<u32> flags, In<FileDescriptor> asFd, In<u32> bigPageSize, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit);
* @brief Remaps a region of the GPU address space
@ -234,7 +234,7 @@ namespace skyline::service::nvdrv::device::nvhost {
return nullptr;
#include <services/nvdrv/devices/deserialisation/macro_def.h>
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 CtrlMagic{0};
@ -254,5 +254,5 @@ namespace skyline::service::nvdrv::device::nvhost {
PosixResult::InvalidArgument) // GetConfig isn't available in production
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
@ -62,7 +62,7 @@ namespace skyline::service::nvdrv::device::nvhost {
#include <services/nvdrv/devices/deserialisation/macro_def.h>
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 CtrlGpuMagic{0x47};
@ -77,5 +77,5 @@ namespace skyline::service::nvdrv::device::nvhost {
IOCTL_CASE_ARGS(OUT, SIZE(0x8), MAGIC(CtrlGpuMagic), FUNC(0x14),
GetActiveSlotMask, ARGS(Out<u32>, Out<u32>))
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
@ -104,7 +104,7 @@ namespace skyline::service::nvdrv::device::nvhost {
#include <services/nvdrv/devices/deserialisation/macro_def.h>
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 GpuChannelUserMagic{0x47};
static constexpr u32 GpuChannelMagic{0x48};
@ -138,5 +138,5 @@ namespace skyline::service::nvdrv::device::nvhost {
SubmitGpfifo2, ARGS(In<u64>, In<u32>, InOut<SubmitGpfifoFlags>, InOut<Fence>))
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
@ -115,7 +115,7 @@ namespace skyline::service::nvdrv::device {
return PosixResult::Success;
#include "deserialisation/macro_def.h"
#include "deserialisation/macro_def.inc"
static constexpr u32 NvMapMagic{1};
@ -132,6 +132,6 @@ namespace skyline::service::nvdrv::device {
GetId, ARGS(Out<NvMapCore::Handle::Id>, In<NvMapCore::Handle::Id>))
#include "deserialisation/macro_undef.h"
#include "deserialisation/macro_undef.inc"
@ -3,7 +3,6 @@
#pragma once
#include "soc/gmmu.h"
#include "soc/host1x.h"
#include "soc/gm20b.h"
@ -14,10 +13,9 @@ namespace skyline::soc {
class SOC {
gmmu::GraphicsMemoryManager gmmu;
host1x::Host1X host1x;
gm20b::GM20B gm20b;
SOC(const DeviceState &state) : gmmu(state), gm20b(state) {}
SOC(const DeviceState &state) : gm20b(state) {}
Normal file
Normal file
@ -0,0 +1,20 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/address_space.inc>
#include "gm20b.h"
namespace skyline {
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GM20B::AddressSpaceBits>;
template class FlatMemoryManager<u64, 0, soc::gm20b::GM20B::AddressSpaceBits>;
namespace skyline::soc::gm20b {
GM20B::GM20B(const DeviceState &state) :
gpfifo(state) {}
@ -3,23 +3,28 @@
#pragma once
#include <common/address_space.h>
#include "gm20b/engines/maxwell_3d.h"
#include "gm20b/gpfifo.h"
namespace skyline::soc::gm20b {
* @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
* @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly
* @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly
class GM20B {
static constexpr u8 AddressSpaceBits{40}; //!< The width of the GMMU AS
using GMMU = FlatMemoryManager<u64, 0, AddressSpaceBits>;
engine::Engine fermi2D;
engine::maxwell3d::Maxwell3D maxwell3D;
engine::Engine maxwellCompute;
engine::Engine maxwellDma;
engine::Engine keplerMemory;
GPFIFO gpfifo;
GMMU gmmu;
GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {}
GM20B(const DeviceState &state);
@ -1,7 +1,7 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <soc/gmmu.h>
#include <common/address_space.h>
#include <soc/gm20b/engines/maxwell_3d.h>
namespace skyline::soc::gm20b::engine::maxwell3d {
@ -157,7 +157,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
switch (registers.semaphore.info.structureSize) {
case Registers::SemaphoreInfo::StructureSize::OneWord:
state.soc->gmmu.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack());
state.soc->gm20b.gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result));
case Registers::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks
@ -167,7 +167,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u64 nsTime{util::GetTimeNs()};
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
state.soc->gmmu.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
state.soc->gm20b.gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp});
@ -56,7 +56,7 @@ namespace skyline::soc::gm20b {
state.soc->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
state.soc->gm20b.gmmu.Read<u32>(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it
@ -88,8 +88,7 @@ namespace skyline::soc::gm20b {
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
throw exception("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
@ -106,7 +105,7 @@ namespace skyline::soc::gm20b {
try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
pushBuffers->Process([this](GpEntry gpEntry) {
state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address());
state.logger->Warn("Processing pushbuffer: 0x{:X}", gpEntry.Address());
} catch (const signal::SignalException &e) {
@ -1,214 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <kernel/types/KProcess.h>
#include "gmmu.h"
namespace skyline::soc::gmmu {
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) {
constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space
constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero
// Create the initial chunk that will be split to create new chunks
ChunkDescriptor baseChunk(gpuAddressSpaceBase, gpuAddressSpaceSize, nullptr, ChunkState::Unmapped);
std::optional<ChunkDescriptor> GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool {
return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState;
if (chunk != chunks.end())
return *chunk;
return std::nullopt;
u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
auto chunkEnd{chunks.end()};
for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) {
if (chunk->CanContain(newChunk)) {
auto oldChunk{*chunk};
u64 newSize{newChunk.virtualAddress - chunk->virtualAddress};
u64 extension{chunk->size - newSize - newChunk.size};
if (newSize == 0) {
*chunk = newChunk;
} else {
chunk->size = newSize;
chunk = chunks.insert(std::next(chunk), newChunk);
if (extension)
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
return newChunk.virtualAddress;
} else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) {
chunk->size = newChunk.virtualAddress - chunk->virtualAddress;
// Deletes all chunks that are within the chunk being inserted and split the final one
auto tailChunk{std::next(chunk)};
while (tailChunk != chunkEnd) {
if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size)
tailChunk = chunks.erase(tailChunk);
chunkEnd = chunks.end();
// The given chunk is too large to fit into existing chunks
if (tailChunk == chunkEnd)
u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress};
tailChunk->virtualAddress += chunkSliceOffset;
tailChunk->size -= chunkSliceOffset;
if (tailChunk->state == ChunkState::Mapped)
tailChunk->cpuPtr += chunkSliceOffset;
// If the size of the head chunk is zero then we can directly replace it with our new one rather than inserting it
auto headChunk{std::prev(tailChunk)};
if (headChunk->size == 0)
*headChunk = newChunk;
chunks.insert(std::next(headChunk), newChunk);
return newChunk.virtualAddress;
throw exception("Failed to insert chunk into GPU address space!");
u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) {
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)};
if (!newChunk) [[unlikely]]
return 0;
auto chunk{*newChunk};
chunk.size = size;
chunk.state = ChunkState::Reserved;
return InsertChunk(chunk);
u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved));
u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
auto mappedChunk{FindChunk(ChunkState::Unmapped, size)};
if (!mappedChunk) [[unlikely]]
return 0;
auto chunk{*mappedChunk};
chunk.cpuPtr = cpuPtr;
chunk.size = size;
chunk.state = ChunkState::Mapped;
return InsertChunk(chunk);
u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped));
bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return false;
try {
std::unique_lock lock(mutex);
InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped));
} catch (const std::exception &e) {
return false;
return true;
void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) {
std::shared_lock lock(mutex);
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtualAddress;
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
u64 initialSize{size};
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *source{chunk->cpuPtr + chunkOffset};
u64 sourceSize{std::min(chunk->size - chunkOffset, size)};
// A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks
while (size) {
std::memcpy(destination + (initialSize - size), source, sourceSize);
size -= sourceSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
source = chunk->cpuPtr;
sourceSize = std::min(chunk->size, size);
void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) {
std::shared_lock lock(mutex);
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtualAddress;
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
u64 initialSize{size};
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *destination{chunk->cpuPtr + chunkOffset};
u64 destinationSize{std::min(chunk->size - chunkOffset, size)};
// A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks
while (size) {
std::memcpy(destination, source + (initialSize - size), destinationSize);
size -= destinationSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
destination = chunk->cpuPtr;
destinationSize = std::min(chunk->size, size);
@ -1,140 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::gmmu {
enum class ChunkState {
Unmapped, //!< The chunk is unmapped
Reserved, //!< The chunk is reserved
Mapped //!< The chunk is mapped and a CPU side address is present
struct ChunkDescriptor {
u64 virtualAddress; //!< The address of the chunk in the virtual address space
u64 size; //!< The size of the chunk in bytes
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
ChunkState state;
ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {}
* @return If the given chunk can be contained wholly within this chunk
inline bool CanContain(const ChunkDescriptor &chunk) {
return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress));
* @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment
class GraphicsMemoryManager {
const DeviceState &state;
std::vector<ChunkDescriptor> chunks;
std::shared_mutex mutex;
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
* @note vmmMutex MUST be locked when calling this
* @param desiredState The state of the chunk to find
* @param size The minimum size of the chunk to find
* @param alignment The minimum alignment of the chunk to find
* @return The first applicable chunk
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
* @note vmmMutex MUST be locked when calling this
* @param newChunk The chunk to insert
* @return The base virtual address of the inserted chunk
u64 InsertChunk(const ChunkDescriptor &newChunk);
GraphicsMemoryManager(const DeviceState &state);
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
* @param size The size of the region to reserve
* @param alignment The alignment of the region to reserve
* @return The base virtual address of the reserved region
u64 ReserveSpace(u64 size, u64 alignment);
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
* @param virtualAddress The virtual base address of the region to allocate
* @param size The size of the region to allocate
* @return The base virtual address of the reserved region
u64 ReserveFixed(u64 virtualAddress, u64 size);
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
u64 MapAllocate(u8 *cpuPtr, u64 size);
* @brief Maps a CPU memory region to a fixed region in the virtual address space
* @param virtualAddress The target virtual address of the region
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size);
* @brief Unmaps all chunks in the given region from the virtual address space
* @return Whether the operation succeeded
bool Unmap(u64 virtualAddress, u64 size);
void Read(u8 *destination, u64 virtualAddress, u64 size);
* @brief Reads in a span from a region of the virtual address space
template<typename T>
void Read(span <T> destination, u64 virtualAddress) {
Read(reinterpret_cast<u8 *>(destination.data()), virtualAddress, destination.size_bytes());
* @brief Reads in an object from a region of the virtual address space
* @tparam T The type of object to return
template<typename T>
T Read(u64 virtualAddress) {
T obj;
Read(reinterpret_cast<u8 *>(&obj), virtualAddress, sizeof(T));
return obj;
void Write(u8 *source, u64 virtualAddress, u64 size);
* @brief Writes out a span to a region of the virtual address space
template<typename T>
void Write(span <T> source, u64 virtualAddress) {
Write(reinterpret_cast<u8 *>(source.data()), virtualAddress, source.size_bytes());
* @brief Reads in an object from a region of the virtual address space
template<typename T>
void Write(T source, u64 virtualAddress) {
Write(reinterpret_cast<u8 *>(&source), virtualAddress, sizeof(T));
Reference in New Issue
Block a user