NEEDS CLEANUP: Reimplement GPU VMM and rewrite nvdrv VM impl

This commit is contained in:
Billy Laws 2021-08-14 20:42:11 +01:00 committed by ◱ Mark
parent 020aa0e43a
commit d03b288db6
23 changed files with 794 additions and 444 deletions

View File

@ -100,7 +100,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/command_scheduler.cpp ${source_DIR}/skyline/gpu/command_scheduler.cpp
${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/texture/texture.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/soc/gmmu.cpp ${source_DIR}/skyline/soc/gm20b.cpp
${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp

View File

@ -0,0 +1,155 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <concepts>
#include <common.h>
namespace skyline {
template<typename VaType, size_t AddressSpaceBits>
concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
/**
* @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
*/
template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatAddressSpaceMap {
private:
/**
* @brief Represents a block of memory in the AS
*/
struct Block {
VaType virt{UnmappedVa}; //!< VA of the block
PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block is encountered
bool flag{}; //!< General purpose flag for use by derived classes
Block() = default;
Block(VaType virt, PaType phys, bool flag) : virt(virt), phys(phys), flag(flag) {}
constexpr bool Valid() {
return virt != UnmappedVa;
}
constexpr bool Mapped() {
return phys != UnmappedPa;
}
constexpr bool Unmapped() {
return phys == UnmappedPa;
}
bool operator<(const VaType &pVirt) const {
return virt < pVirt;
}
};
protected:
std::mutex blockMutex;
std::vector<Block> blocks{Block{}};
/**
* @brief Maps a PA range into the given AS region, optionally setting the flag
* @note blockMutex MUST be locked when calling this
*/
void MapLocked(VaType virt, PaType phys, VaType size, bool flag = {});
/**
* @brief Unmaps the given range and merges it with other unmapped regions
* @note blockMutex MUST be locked when calling this
*/
void UnmapLocked(VaType virt, VaType size);
public:
static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + ((1ULL << (AddressSpaceBits - 1)) - 1)}; //!< The maximum VA that this AS can technically reach
VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
FlatAddressSpaceMap(VaType pVaLimit);
FlatAddressSpaceMap() = default;
/**
* @brief Locked version of MapLocked
*/
void Map(VaType virt, PaType phys, VaType size, bool flag = {});
/**
* @brief Locked version of UnmapLocked
*/
void Unmap(VaType virt, VaType size);
};
/**
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions
*/
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits> {
public:
/**
* @return A placeholder address for sparse mapped regions, this means nothing
*/
static u8 *SparsePlaceholderAddress() {
return reinterpret_cast<u8 *>(0xCAFEBABE);
}
void Read(u8 *destination, VaType virt, VaType size);
template<typename T>
void Read(span <T> destination, VaType virt) {
Read(reinterpret_cast<u8 *>(destination.data()), virt, destination.size_bytes());
}
template<typename T>
T Read(VaType virt) {
T obj;
Read(reinterpret_cast<u8 *>(&obj), virt, sizeof(T));
return obj;
}
void Write(VaType virt, u8 *source, VaType size);
template<typename T>
void Write(VaType virt, span <T> source) {
Write(virt, reinterpret_cast<u8 *>(source.data()), source.size_bytes());
}
template<typename T>
void Write(VaType virt, T source) {
Write(virt, reinterpret_cast<u8 *>(&source), sizeof(T));
}
};
/**
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
*/
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatAllocator : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
private:
using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once this reaches the AS limit the slower allocation path will be used
public:
VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
FlatAllocator(VaType vaStart, VaType vaLimit);
/**
* @brief Allocates a region in the AS of the given size and returns its address
*/
VaType Allocate(VaType size);
/**
* @brief Marks the given region in the AS as allocated
*/
void AllocateFixed(VaType virt, VaType size);
/**
* @brief Frees an AS region so it can be used again
*/
void Free(VaType virt, VaType size);
};
}

View File

@ -0,0 +1,354 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/trace.h>
#include <kernel/types/KProcess.h>
#include "address_space.h"
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits>
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
namespace skyline {
MAP_MEMBER()::FlatAddressSpaceMap(VaType pVaLimit) : vaLimit(pVaLimit) {
if (pVaLimit > VaMaximum)
throw exception("Invalid VA limit!");
}
MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, bool flag) {
TRACE_EVENT("containers", "FlatAddressSpaceMap::Map");
VaType virtEnd{virt + size};
if (virtEnd > vaLimit)
throw exception("Trying to map a block past the VA limit!");
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
if (blockEndSuccessor == blocks.begin())
throw exception("Unexpected Memory Manager state!");
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
if (blockEndSuccessor != blocks.end()) {
// We have blocks in front of us, if one is directly in front then we don't have to add a tail
if (blockEndSuccessor->virt != virtEnd) {
PaType tailPhys{[&]() -> PaType {
if (!PaContigSplit || blockEndPredecessor->Unmapped())
return blockEndPredecessor->phys; // Always propagate unmapped regions
else
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
}()};
if (blockEndPredecessor->virt >= virt) {
// If this block's start would be overlapped by the map then reuse it as a tail block
blockEndPredecessor->virt = virtEnd;
blockEndPredecessor->phys = tailPhys;
blockEndPredecessor->flag = blockEndPredecessor->flag;
} else {
// Else insert a new one and we're done
blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, tailPhys, blockEndPredecessor->flag)});
return;
}
}
} else {
// blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped chunk
if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
// Move the unmapped block start backwards
blockEndPredecessor->virt = virtEnd;
} else {
// Else insert a new one and we're done
blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, UnmappedPa, false)});
return;
}
}
auto blockStartSuccessor{blockEndPredecessor};
// Walk the block vector to find the start successor as this is more efficient than another binary search in most scenarios
while (std::prev(blockStartSuccessor)->virt >= virt)
std::advance(blockStartSuccessor, -1);
if (blockStartSuccessor->virt > virtEnd)
throw exception("Unexpected Memory Manager state!");
if (blockStartSuccessor->virt == virtEnd) {
// We need to create a new block as there are none spare that we would overwrite
blocks.insert(blockStartSuccessor, Block(virt, phys, flag));
return;
} else {
blockStartSuccessor->virt = virt;
blockStartSuccessor->phys = phys;
blockStartSuccessor->flag = flag;
// Erase overwritten blocks
if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) {
if (eraseStart == blockEndPredecessor)
__builtin_trap();
blocks.erase(eraseStart, blockEndPredecessor);
}
}
}
MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
TRACE_EVENT("containers", "FlatAddressSpaceMap::Unmap");
VaType virtEnd{virt + size};
if (virtEnd > vaLimit)
throw exception("Trying to map a block past the VA limit!");
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
if (blockEndSuccessor == blocks.begin())
throw exception("Unexpected Memory Manager state!");
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
auto walkBackToPredecessor{[&](auto iter) {
while (iter->virt >= virt)
std::advance(iter, -1);
return iter;
}};
auto eraseBlocksWithEndUnmapped{[&] (auto unmappedEnd) {
auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
auto blockStartSuccessor{std::next(blockStartPredecessor)};
auto eraseEnd{[&]() {
if (blockStartPredecessor->Unmapped()) {
// If the start predecessor is unmapped then we can erase everything in our region and be done
return std::next(unmappedEnd);
} else {
// Else reuse the end predecessor as the start of our unmapped region then erase all up to it
unmappedEnd->virt = virt;
return unmappedEnd;
}
}()};
// We can't have two unmapped regions after each other
if (eraseEnd == blockStartSuccessor || (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))
throw exception("Unexpected Memory Manager state!");
blocks.erase(blockStartSuccessor, eraseEnd);
}};
// We can avoid any splitting logic if these are the case
if (blockEndPredecessor->Unmapped()) {
if (blockEndPredecessor->virt > virt)
eraseBlocksWithEndUnmapped(blockEndPredecessor);
return; // The region is unmapped, bail out early
} else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
eraseBlocksWithEndUnmapped(blockEndSuccessor);
return; // The region is unmapped here and doesn't need splitting, bail out early
} else if (blockEndSuccessor == blocks.end()) {
// This should never happen as the end should always follow an unmapped block
throw exception("Unexpected Memory Manager state!");
} else if (blockEndSuccessor->virt != virtEnd) {
// If one block is directly in front then we don't have to add a tail
// The previous block is mapped so we will need to add a tail with an offset
PaType tailPhys{[&]() {
if constexpr (PaContigSplit)
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
else
return blockEndPredecessor->phys;
}()};
if (blockEndPredecessor->virt >= virt) {
// If this block's start would be overlapped by the unmap then reuse it as a tail block
blockEndPredecessor->virt = virtEnd;
blockEndPredecessor->phys = tailPhys;
} else {
blocks.insert(blockEndSuccessor, {Block(virt, UnmappedPa, false), Block(virtEnd, tailPhys, blockEndPredecessor->flag)});
return; // The previous block is mapped and ends bef
}
}
// Walk the block vector to find the start predecessor as this is more efficient than another binary search in most scenarios
auto blockStartPredecessor{walkBackToPredecessor(blockEndPredecessor)};
auto blockStartSuccessor{std::next(blockStartPredecessor)};
if (blockStartSuccessor->virt > virtEnd)
throw exception("Unexpected Memory Manager state!");
if (blockStartSuccessor->virt == virtEnd) {
// There are no blocks between the start and the end that would let us skip inserting a new one for head
// The previous block is may be unmapped, if so we don't need to insert any unmaps after it
if (blockStartPredecessor->Mapped())
blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, false));
} else if (blockStartPredecessor->Unmapped()) {
// If the previous block is unmapped
blocks.erase(blockStartSuccessor, blockEndPredecessor);
} else {
// Add in the unmapped block header
blockStartSuccessor->virt = virt;
blockStartSuccessor->phys = UnmappedPa;
// Erase overwritten blocks, skipping the first one as we have written the unmapped start block there
if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) {
if (eraseStart == blockEndPredecessor)
__builtin_trap();
blocks.erase(eraseStart, blockEndPredecessor);
}
}
}
MAP_MEMBER(void)::Map(VaType virt, PaType phys, VaType size, bool flag) {
std::scoped_lock lock(blockMutex);
MapLocked(virt, phys, size, flag);
}
MAP_MEMBER(void)::Unmap(VaType virt, VaType size) {
std::scoped_lock lock(blockMutex);
UnmapLocked(virt, size);
}
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size) {
std::scoped_lock lock(this->blockMutex);
TRACE_EVENT("containers", "FlatMemoryManager::Read");
VaType virtEnd{virt + size};
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
return virt < block.virt;
})};
auto predecessor{std::prev(successor)};
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
VaType blockReadSize{std::min(successor->virt - virt, size)};
while (size) {
if (predecessor->phys == nullptr) {
if (predecessor->flag) // Sparse mapping
std::memset(destination, 0, blockReadSize);
else
throw exception("Page fault at: 0x{:X}", predecessor->virt);
} else {
std::memcpy(destination, blockPhys, blockReadSize);
}
destination += blockReadSize;
size -= blockReadSize;
if (size) {
predecessor = successor++;
blockPhys = predecessor->phys;
blockReadSize = std::min(successor->virt - predecessor->virt, size);
}
}
}
MM_MEMBER(void)::Write(VaType virt, u8 *source, VaType size) {
std::scoped_lock lock(this->blockMutex);
TRACE_EVENT("containers", "FlatMemoryManager::Write");
VaType virtEnd{virt + size};
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
return virt < block.virt;
})};
auto predecessor{std::prev(successor)};
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
VaType blockWriteSize{std::min(successor->virt - virt, size)};
while (size) {
if (predecessor->phys == nullptr) {
if (!predecessor->flag) // Sparse mappings allow unmapped writes
throw exception("Page fault at: 0x{:X}", predecessor->virt);
} else {
std::memcpy(blockPhys, source, blockWriteSize);
}
source += blockWriteSize;
size -= blockWriteSize;
if (size) {
predecessor = successor++;
blockPhys = predecessor->phys;
blockWriteSize = std::min(successor->virt - predecessor->virt, size);
}
}
}
ALLOC_MEMBER()::FlatAllocator(VaType vaStart, VaType vaLimit) : Base(vaLimit), vaStart(vaStart), currentLinearAllocEnd(vaStart) {}
ALLOC_MEMBER(VaType)::Allocate(VaType size) {
std::scoped_lock lock(this->blockMutex);
TRACE_EVENT("containers", "FlatAllocator::Allocate");
VaType allocStart{UnmappedVa};
VaType allocEnd{currentLinearAllocEnd + size};
if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
auto allocEndSuccessor{std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
if (allocEndSuccessor == this->blocks.begin())
throw exception("Unexpected allocator state!");
auto allocEndPredecessor{std::prev(allocEndSuccessor)};
if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
allocStart = currentLinearAllocEnd;
} else {
while (allocEndSuccessor != this->blocks.end()) {
if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || allocEndPredecessor->Mapped() ) {
allocStart = allocEndPredecessor->virt;
break;
}
allocEndPredecessor = allocEndSuccessor++;
if (allocEndSuccessor == this->blocks.end()) {
allocEnd = allocEndPredecessor->virt + size;
if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
allocStart = allocEndPredecessor->virt;
}
}
}
}
if (allocStart != UnmappedVa) {
currentLinearAllocEnd = allocStart + size;
} else { // If linear allocation overflows the AS then find a gap
if (this->blocks.size() <= 2)
throw exception("Unexpected allocator state!");
auto searchPredecessor{this->blocks.begin()};
auto searchSuccessor{std::next(searchPredecessor)};
while (searchSuccessor != this->blocks.end() &&
(searchSuccessor->virt - searchPredecessor->virt < size || searchPredecessor->Mapped())) {
searchPredecessor = searchSuccessor++;
}
if (searchSuccessor != this->blocks.end())
allocStart = searchPredecessor->virt;
else
throw exception("Unexpected allocator state!");
}
this->MapLocked(allocStart, true, size);
return allocStart;
}
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
this->MapLocked(virt, true, size);
}
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
this->UnmapLocked(virt, size);
}
}

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <common/trace.h>
#include <common.h> #include <common.h>
namespace skyline { namespace skyline {
@ -51,10 +52,15 @@ namespace skyline {
*/ */
template<typename F> template<typename F>
[[noreturn]] void Process(F function) { [[noreturn]] void Process(F function) {
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
while (true) { while (true) {
if (start == end) { if (start == end) {
std::unique_lock lock(productionMutex); std::unique_lock lock(productionMutex);
TRACE_EVENT_END("containers");
produceCondition.wait(lock, [this]() { return start != end; }); produceCondition.wait(lock, [this]() { return start != end; });
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
} }
while (start != end) { while (start != end) {

View File

@ -183,8 +183,6 @@ namespace skyline::signal {
std::call_once(signalHandlerOnce[signal], [signal, &action]() { std::call_once(signalHandlerOnce[signal], [signal, &action]() {
struct sigaction oldAction; struct sigaction oldAction;
Sigaction(signal, &action, &oldAction); Sigaction(signal, &action, &oldAction);
if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags)
throw exception("Old sigaction flags aren't equivalent to the replaced signal: {:#b} | {:#b}", oldAction.sa_flags, action.sa_flags);
DefaultSignalHandlers.at(signal).function = (oldAction.sa_flags & SA_SIGINFO) ? oldAction.sa_sigaction : reinterpret_cast<void (*)(int, struct siginfo *, void *)>(oldAction.sa_handler); DefaultSignalHandlers.at(signal).function = (oldAction.sa_flags & SA_SIGINFO) ? oldAction.sa_sigaction : reinterpret_cast<void (*)(int, struct siginfo *, void *)>(oldAction.sa_handler);
}); });

View File

@ -13,7 +13,8 @@ PERFETTO_DEFINE_CATEGORIES(
perfetto::Category("kernel").SetDescription("Events from parts of the HLE kernel"), perfetto::Category("kernel").SetDescription("Events from parts of the HLE kernel"),
perfetto::Category("guest").SetDescription("Events relating to guest code"), perfetto::Category("guest").SetDescription("Events relating to guest code"),
perfetto::Category("gpu").SetDescription("Events from the emulated GPU"), perfetto::Category("gpu").SetDescription("Events from the emulated GPU"),
perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations") perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations"),
perfetto::Category("containers").SetDescription("Events from custom container implementations")
); );
namespace skyline::trace { namespace skyline::trace {

View File

@ -13,6 +13,7 @@ namespace skyline::service {
Busy = 16, // EBUSY Busy = 16, // EBUSY
InvalidArgument = 22, // EINVAL InvalidArgument = 22, // EINVAL
InappropriateIoctlForDevice = 25, // ENOTTY InappropriateIoctlForDevice = 25, // ENOTTY
FunctionNotImplemented = 38, // ENOSYS
NotSupported = 95, // EOPNOTSUPP, ENOTSUP NotSupported = 95, // EOPNOTSUPP, ENOTSUP
TimedOut = 110, // ETIMEDOUT TimedOut = 110, // ETIMEDOUT

View File

@ -1,10 +1,16 @@
// SPDX-License-Identifier: MIT OR MPL-2.0 // SPDX-License-Identifier: MIT OR MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/address_space.inc>
#include <soc.h> #include <soc.h>
#include <services/nvdrv/devices/deserialisation/deserialisation.h> #include <services/nvdrv/devices/deserialisation/deserialisation.h>
#include "as_gpu.h" #include "as_gpu.h"
namespace skyline {
template class FlatAddressSpaceMap<u32, 0, bool, false, false, 32>;
template class FlatAllocator<u32, 0, 32>;
}
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {}
@ -14,38 +20,66 @@ namespace skyline::service::nvdrv::device::nvhost {
} }
PosixResult AsGpu::AllocSpace(In<u32> pages, In<u32> pageSize, In<MappingFlags> flags, InOut<u64> offset) { PosixResult AsGpu::AllocSpace(In<u32> pages, In<u32> pageSize, In<MappingFlags> flags, InOut<u64> offset) {
// TODO: track this on the nvdrv side and have the gmmu only do virt -> phys state.logger->Debug("pages: 0x{:X}, pageSize: 0x{:X}, flags: ( fixed: {}, sparse: {} ), offset: 0x{:X}", pages, pageSize, flags.fixed, flags.sparse, offset);
// Also fix error codes
u64 size{static_cast<u64>(pages) * static_cast<u64>(pageSize)}; if (pageSize != VM::PageSize && pageSize != vm.bigPageSize)
return PosixResult::InvalidArgument;
if (pageSize != vm.bigPageSize && flags.sparse)
return PosixResult::FunctionNotImplemented;
u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits};
auto &allocator{[&] () -> auto & {
if (pageSize == VM::PageSize)
return vm.smallPageAllocator;
else
return vm.bigPageAllocator;
}()};
if (flags.fixed) if (flags.fixed)
offset = state.soc->gmmu.ReserveFixed(offset, size); allocator->AllocateFixed(offset >> pageSizeBits, pages);
else else
offset = state.soc->gmmu.ReserveSpace(size, offset); // offset contains the input alignment offset = static_cast<u64>(allocator->Allocate(pages)) << pageSizeBits;
if (offset == 0) { u64 size{static_cast<u64>(pages) * static_cast<u64>(pageSize)};
state.logger->Warn("Failed to allocate GPU address space region!");
return PosixResult::InvalidArgument; if (flags.sparse)
} state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true);
allocationMap[offset] = {
.size = size,
.pageSize = pageSize,
.sparse = flags.sparse
};
return PosixResult::Success; return PosixResult::Success;
} }
PosixResult AsGpu::FreeSpace(In<u64> offset, In<u32> pages, In<u32> pageSize) { PosixResult AsGpu::FreeSpace(In<u64> offset, In<u32> pages, In<u32> pageSize) {
// TODO: implement this when we add nvdrv side address space allocation // TODO: implement after UNMAP
return PosixResult::Success; return PosixResult::Success;
} }
PosixResult AsGpu::UnmapBuffer(In<u64> offset) { PosixResult AsGpu::UnmapBuffer(In<u64> offset) {
state.logger->Debug("offset: 0x{:X}", offset);
try { try {
auto region{regionMap.at(offset)}; auto mapping{mappingMap.at(offset)};
// Non-fixed regions are unmapped so that they can be used by future non-fixed mappings if (!mapping->fixed) {
if (!region.fixed) auto &allocator{mapping->bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
if (!state.soc->gmmu.Unmap(offset, region.size)) u32 pageSizeBits{mapping->bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
state.logger->Warn("Failed to unmap region at 0x{:X}", offset);
regionMap.erase(offset); allocator->Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
}
if (mapping->sparseAlloc)
state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), mapping->size, true);
else
state.soc->gm20b.gmmu.Unmap(offset, mapping->size);
mappingMap.erase(offset);
} catch (const std::out_of_range &e) { } catch (const std::out_of_range &e) {
state.logger->Warn("Couldn't find region to unmap at 0x{:X}", offset); state.logger->Warn("Couldn't find region to unmap at 0x{:X}", offset);
} }
@ -53,62 +87,94 @@ namespace skyline::service::nvdrv::device::nvhost {
return PosixResult::Success; return PosixResult::Success;
} }
PosixResult AsGpu::MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, InOut<u32> pageSize, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset) { PosixResult AsGpu::MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset) {
state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, pageSize: 0x{:X}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, pageSize, bufferOffset, mappingSize, offset); if (!vm.initialised)
return PosixResult::InvalidArgument;
state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, bufferOffset, mappingSize, offset);
if (flags.remap) { if (flags.remap) {
auto region{regionMap.lower_bound(offset)}; try {
if (region == regionMap.end()) { auto mapping{mappingMap.at(offset)};
state.logger->Warn("Cannot remap an unmapped GPU address space region: 0x{:X}", offset);
return PosixResult::InvalidArgument;
}
if (region->second.size < mappingSize) { if (mapping->size < mappingSize) {
state.logger->Warn("Cannot remap an partially mapped GPU address space region: 0x{:X}", offset); state.logger->Warn("Cannot remap a partially mapped GPU address space region: 0x{:X}", offset);
return PosixResult::InvalidArgument; return PosixResult::InvalidArgument;
} }
u64 gpuAddress{offset + bufferOffset}; u64 gpuAddress{offset + bufferOffset};
u8 *cpuPtr{region->second.ptr + bufferOffset}; u8 *cpuPtr{mapping->ptr + bufferOffset};
if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, mappingSize)) { state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize);
state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress);
return PosixResult::InvalidArgument;
}
return PosixResult::Success; return PosixResult::Success;
} catch (const std::out_of_range &e) {
state.logger->Warn("Cannot remap an unmapped GPU address space region: 0x{:X}", offset);
return PosixResult::InvalidArgument;
}
} }
auto h{core.nvMap.GetHandle(handle)}; auto h{core.nvMap.GetHandle(handle)};
if (!h) if (!h)
return PosixResult::InvalidArgument; return PosixResult::InvalidArgument;
if (auto err{h->Duplicate(ctx.internalSession)}; err != PosixResult::Success)
return err;
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + bufferOffset)}; u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + bufferOffset)};
u64 size{mappingSize ? mappingSize : h->origSize}; u64 size{mappingSize ? mappingSize : h->origSize};
if (flags.fixed) if (flags.fixed) {
offset = state.soc->gmmu.MapFixed(offset, cpuPtr, size); auto alloc{allocationMap.upper_bound(offset)};
if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size)
throw exception("Cannot perform a fixed mapping into an unallocated region!");
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size);
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, true, false, alloc->second.sparse)};
alloc->second.mappings.push_back(mapping);
mappingMap[offset] = mapping;
} else {
bool bigPage{[&] () {
if (util::IsAligned(h->align, vm.bigPageSize))
return true;
else if (util::IsAligned(h->align, VM::PageSize))
return false;
else else
offset = state.soc->gmmu.MapAllocate(cpuPtr, size); throw exception("Invalid handle alignment: 0x{:X}", h->align);
}()};
if (offset == 0) { auto &allocator{bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
state.logger->Warn("Failed to map GPU address space region!"); u32 pageSize{bigPage ? vm.bigPageSize : VM::PageSize};
return PosixResult::InvalidArgument; u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits;
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size);
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)};
mappingMap[offset] = mapping;
} }
state.logger->Debug("Mapped to 0x{:X}", offset); state.logger->Debug("Mapped to 0x{:X}", offset);
regionMap[offset] = {cpuPtr, size, flags.fixed};
return PosixResult::Success; return PosixResult::Success;
} }
PosixResult AsGpu::GetVaRegions(In<u64> bufAddr, InOut<u32> bufSize, Out<std::array<VaRegion, 2>> vaRegions) { PosixResult AsGpu::GetVaRegions(In<u64> bufAddr, InOut<u32> bufSize, Out<std::array<VaRegion, 2>> vaRegions) {
// TODO: impl when we move allocator to nvdrv if (!vm.initialised)
return PosixResult::InvalidArgument;
vaRegions = std::array<VaRegion, 2> {
VaRegion{
.pageSize = VM::PageSize,
.pages = vm.smallPageAllocator->vaLimit - vm.smallPageAllocator->vaStart,
.offset = vm.smallPageAllocator->vaStart << VM::PageSizeBits,
},
VaRegion{
.pageSize = vm.bigPageSize,
.pages = vm.bigPageAllocator->vaLimit - vm.bigPageAllocator->vaStart,
.offset = vm.bigPageAllocator->vaStart << vm.bigPageSizeBits,
}
};
return PosixResult::Success; return PosixResult::Success;
} }
@ -116,30 +182,83 @@ namespace skyline::service::nvdrv::device::nvhost {
return GetVaRegions(bufAddr, bufSize, vaRegions); return GetVaRegions(bufAddr, bufSize, vaRegions);
} }
PosixResult AsGpu::AllocAsEx(In<u32> bigPageSize, In<FileDescriptor> asFd, In<u32> flags, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit) { PosixResult AsGpu::AllocAsEx(In<u32> flags, In<FileDescriptor> asFd, In<u32> bigPageSize, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit) {
// TODO: create the allocator here if (vm.initialised)
throw exception("Cannot initialise an address space twice!");
state.logger->Debug("bigPageSize: 0x{:X}, asFd: {}, flags: 0x{:X}, vaRangeStart: 0x{:X}, vaRangeEnd: 0x{:X}, vaRangeSplit: 0x{:X}",
bigPageSize, asFd, flags, vaRangeStart, vaRangeEnd, vaRangeSplit);
if (bigPageSize) {
if (!std::ispow2(bigPageSize)) {
state.logger->Error("Non power-of-2 big page size: 0x{:X}!", bigPageSize);
return PosixResult::InvalidArgument;
}
if (!(bigPageSize & VM::SupportedBigPageSizes)) {
state.logger->Error("Unsupported big page size: 0x{:X}!", bigPageSize);
return PosixResult::InvalidArgument;
}
vm.bigPageSize = bigPageSize;
vm.bigPageSizeBits = std::countr_zero(bigPageSize);
vm.vaRangeStart = bigPageSize << VM::VaStartShift;
}
if (vaRangeStart) {
vm.vaRangeStart = vaRangeStart;
vm.vaRangeSplit = vaRangeSplit;
vm.vaRangeEnd = vaRangeEnd;
}
u64 startPages{vm.vaRangeStart >> VM::PageSizeBits};
u64 endPages{vm.vaRangeSplit >> VM::PageSizeBits};
vm.smallPageAllocator = std::make_unique<VM::Allocator>(startPages, endPages);
u64 startBigPages{vm.vaRangeSplit >> vm.bigPageSizeBits};
u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits};
vm.bigPageAllocator = std::make_unique<VM::Allocator>(startBigPages, endBigPages);
vm.initialised = true;
return PosixResult::Success; return PosixResult::Success;
} }
PosixResult AsGpu::Remap(span<RemapEntry> entries) { PosixResult AsGpu::Remap(span<RemapEntry> entries) {
constexpr u32 BigPageSize{0x10}; //!< The big page size of the GPU
for (const auto &entry : entries) { for (const auto &entry : entries) {
u64 virtAddr{static_cast<u64>(entry.asOffsetBigPages) << vm.bigPageSizeBits};
u64 size{static_cast<u64>(entry.bigPages) << vm.bigPageSizeBits};
auto alloc{allocationMap.upper_bound(virtAddr)};
if (alloc-- == allocationMap.begin() || (virtAddr - alloc->first) + size > alloc->second.size) {
state.logger->Warn("Cannot remap into an unallocated region!");
return PosixResult::InvalidArgument;
}
if (!alloc->second.sparse) {
state.logger->Warn("Cannot remap a non-sparse mapping!");
return PosixResult::InvalidArgument;
}
if (!entry.handle) {
state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true);
} else {
auto h{core.nvMap.GetHandle(entry.handle)}; auto h{core.nvMap.GetHandle(entry.handle)};
if (!h) if (!h)
return PosixResult::InvalidArgument; return PosixResult::InvalidArgument;
u64 virtAddr{static_cast<u64>(entry.asOffsetBigPages) << BigPageSize}; u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << vm.bigPageSizeBits))};
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << BigPageSize))};
u64 size{static_cast<u64>(entry.bigPages) << BigPageSize};
state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size); state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size);
}
} }
return PosixResult::Success; return PosixResult::Success;
} }
#include <services/nvdrv/devices/deserialisation/macro_def.h> #include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 AsGpuMagic{0x41}; static constexpr u32 AsGpuMagic{0x41};
VARIABLE_IOCTL_HANDLER_FUNC(AsGpu, ({ VARIABLE_IOCTL_HANDLER_FUNC(AsGpu, ({
@ -152,7 +271,7 @@ namespace skyline::service::nvdrv::device::nvhost {
IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(AsGpuMagic), FUNC(0x5), IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(AsGpuMagic), FUNC(0x5),
UnmapBuffer, ARGS(In<u64>)) UnmapBuffer, ARGS(In<u64>))
IOCTL_CASE_ARGS(INOUT, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x6), IOCTL_CASE_ARGS(INOUT, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x6),
MapBufferEx, ARGS(In<MappingFlags>, In<u32>, In<core::NvMap::Handle::Id>, InOut<u32>, In<u64>, In<u64>, InOut<u64>)) MapBufferEx, ARGS(In<MappingFlags>, In<u32>, In<core::NvMap::Handle::Id>, Pad<u32>, In<u64>, In<u64>, InOut<u64>))
IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8), IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8),
GetVaRegions, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>)) GetVaRegions, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>))
IOCTL_CASE_ARGS(IN, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x9), IOCTL_CASE_ARGS(IN, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x9),
@ -166,5 +285,5 @@ namespace skyline::service::nvdrv::device::nvhost {
INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8), INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8),
GetVaRegions3, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>)) GetVaRegions3, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>))
})) }))
#include <services/nvdrv/devices/deserialisation/macro_undef.h> #include <services/nvdrv/devices/deserialisation/macro_undef.inc>
} }

View File

@ -3,6 +3,8 @@
#pragma once #pragma once
#include <common/address_space.h>
#include <services/nvdrv/devices/nvdevice.h> #include <services/nvdrv/devices/nvdevice.h>
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
@ -12,18 +14,64 @@ namespace skyline::service::nvdrv::device::nvhost {
*/ */
class AsGpu : public NvDevice { class AsGpu : public NvDevice {
private: private:
struct AddressSpaceRegion { struct Mapping {
u8 *ptr; u8 *ptr;
u64 offset;
u64 size; u64 size;
bool fixed; bool fixed;
bool bigPage; // Only valid if fixed == false
bool sparseAlloc;
Mapping(u8 *ptr, u64 offset, u64 size, bool fixed, bool bigPage, bool sparseAlloc) : ptr(ptr),
offset(offset),
size(size),
fixed(fixed),
bigPage(bigPage),
sparseAlloc(sparseAlloc) {}
}; };
std::map<u64, AddressSpaceRegion> regionMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag. struct Allocation {
u64 size;
std::list<std::shared_ptr<Mapping>> mappings;
u32 pageSize;
bool sparse;
};
std::map<u64, std::shared_ptr<Mapping>> mappingMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag.
std::map<u64, Allocation> allocationMap;
struct VM {
static constexpr u32 PageSize{0x1000};
static constexpr u32 PageSizeBits{std::countr_zero(PageSize)};
static constexpr u32 SupportedBigPageSizes{0x30000};
static constexpr u32 DefaultBigPageSize{0x20000};
u32 bigPageSize{DefaultBigPageSize};
u32 bigPageSizeBits{std::countr_zero(DefaultBigPageSize)};
static constexpr u32 VaStartShift{10};
static constexpr u64 DefaultVaSplit{1ULL << 34};
static constexpr u64 DefaultVaRange{1ULL << 37};
u64 vaRangeStart{DefaultBigPageSize << VaStartShift};
u64 vaRangeSplit{DefaultVaSplit};
u64 vaRangeEnd{DefaultVaRange};
using Allocator = FlatAllocator<u32, 0, 32>;
std::unique_ptr<Allocator> bigPageAllocator{};
std::unique_ptr<Allocator> smallPageAllocator{};
bool initialised{};
} vm;
public: public:
struct MappingFlags { struct MappingFlags {
bool fixed : 1; bool fixed : 1;
u8 _pad0_ : 7; bool sparse : 1;
u8 _pad0_ : 6;
bool remap : 1; bool remap : 1;
u32 _pad1_ : 23; u32 _pad1_ : 23;
}; };
@ -77,7 +125,7 @@ namespace skyline::service::nvdrv::device::nvhost {
* @brief Maps a region into this address space with extra parameters * @brief Maps a region into this address space with extra parameters
* @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_MAP_BUFFER_EX * @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_MAP_BUFFER_EX
*/ */
PosixResult MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, InOut<u32> pageSize, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset); PosixResult MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset);
/** /**
* @brief Returns info about the address space and its page sizes * @brief Returns info about the address space and its page sizes
@ -94,7 +142,7 @@ namespace skyline::service::nvdrv::device::nvhost {
* @brief Allocates this address space with the given parameters * @brief Allocates this address space with the given parameters
* @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_ALLOC_AS_EX * @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_ALLOC_AS_EX
*/ */
PosixResult AllocAsEx(In<u32> bigPageSize, In<FileDescriptor> asFd, In<u32> flags, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit); PosixResult AllocAsEx(In<u32> flags, In<FileDescriptor> asFd, In<u32> bigPageSize, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit);
/** /**
* @brief Remaps a region of the GPU address space * @brief Remaps a region of the GPU address space

View File

@ -234,7 +234,7 @@ namespace skyline::service::nvdrv::device::nvhost {
return nullptr; return nullptr;
} }
#include <services/nvdrv/devices/deserialisation/macro_def.h> #include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 CtrlMagic{0}; static constexpr u32 CtrlMagic{0};
IOCTL_HANDLER_FUNC(Ctrl, ({ IOCTL_HANDLER_FUNC(Ctrl, ({
@ -254,5 +254,5 @@ namespace skyline::service::nvdrv::device::nvhost {
IOCTL_CASE_RESULT(INOUT, SIZE(0x183), MAGIC(CtrlMagic), FUNC(0x1B), IOCTL_CASE_RESULT(INOUT, SIZE(0x183), MAGIC(CtrlMagic), FUNC(0x1B),
PosixResult::InvalidArgument) // GetConfig isn't available in production PosixResult::InvalidArgument) // GetConfig isn't available in production
})) }))
#include <services/nvdrv/devices/deserialisation/macro_undef.h> #include <services/nvdrv/devices/deserialisation/macro_undef.inc>
} }

View File

@ -62,7 +62,7 @@ namespace skyline::service::nvdrv::device::nvhost {
} }
} }
#include <services/nvdrv/devices/deserialisation/macro_def.h> #include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 CtrlGpuMagic{0x47}; static constexpr u32 CtrlGpuMagic{0x47};
IOCTL_HANDLER_FUNC(CtrlGpu, ({ IOCTL_HANDLER_FUNC(CtrlGpu, ({
@ -77,5 +77,5 @@ namespace skyline::service::nvdrv::device::nvhost {
IOCTL_CASE_ARGS(OUT, SIZE(0x8), MAGIC(CtrlGpuMagic), FUNC(0x14), IOCTL_CASE_ARGS(OUT, SIZE(0x8), MAGIC(CtrlGpuMagic), FUNC(0x14),
GetActiveSlotMask, ARGS(Out<u32>, Out<u32>)) GetActiveSlotMask, ARGS(Out<u32>, Out<u32>))
})) }))
#include <services/nvdrv/devices/deserialisation/macro_undef.h> #include <services/nvdrv/devices/deserialisation/macro_undef.inc>
} }

View File

@ -104,7 +104,7 @@ namespace skyline::service::nvdrv::device::nvhost {
} }
} }
#include <services/nvdrv/devices/deserialisation/macro_def.h> #include <services/nvdrv/devices/deserialisation/macro_def.inc>
static constexpr u32 GpuChannelUserMagic{0x47}; static constexpr u32 GpuChannelUserMagic{0x47};
static constexpr u32 GpuChannelMagic{0x48}; static constexpr u32 GpuChannelMagic{0x48};
@ -138,5 +138,5 @@ namespace skyline::service::nvdrv::device::nvhost {
INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x18), MAGIC(GpuChannelMagic), FUNC(0x1B), INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x18), MAGIC(GpuChannelMagic), FUNC(0x1B),
SubmitGpfifo2, ARGS(In<u64>, In<u32>, InOut<SubmitGpfifoFlags>, InOut<Fence>)) SubmitGpfifo2, ARGS(In<u64>, In<u32>, InOut<SubmitGpfifoFlags>, InOut<Fence>))
})) }))
#include <services/nvdrv/devices/deserialisation/macro_undef.h> #include <services/nvdrv/devices/deserialisation/macro_undef.inc>
} }

View File

@ -115,7 +115,7 @@ namespace skyline::service::nvdrv::device {
return PosixResult::Success; return PosixResult::Success;
} }
#include "deserialisation/macro_def.h" #include "deserialisation/macro_def.inc"
static constexpr u32 NvMapMagic{1}; static constexpr u32 NvMapMagic{1};
IOCTL_HANDLER_FUNC(NvMap, ({ IOCTL_HANDLER_FUNC(NvMap, ({
@ -132,6 +132,6 @@ namespace skyline::service::nvdrv::device {
IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(NvMapMagic), FUNC(0xE), IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(NvMapMagic), FUNC(0xE),
GetId, ARGS(Out<NvMapCore::Handle::Id>, In<NvMapCore::Handle::Id>)) GetId, ARGS(Out<NvMapCore::Handle::Id>, In<NvMapCore::Handle::Id>))
})) }))
#include "deserialisation/macro_undef.h" #include "deserialisation/macro_undef.inc"
} }

View File

@ -3,7 +3,6 @@
#pragma once #pragma once
#include "soc/gmmu.h"
#include "soc/host1x.h" #include "soc/host1x.h"
#include "soc/gm20b.h" #include "soc/gm20b.h"
@ -14,10 +13,9 @@ namespace skyline::soc {
*/ */
class SOC { class SOC {
public: public:
gmmu::GraphicsMemoryManager gmmu;
host1x::Host1X host1x; host1x::Host1X host1x;
gm20b::GM20B gm20b; gm20b::GM20B gm20b;
SOC(const DeviceState &state) : gmmu(state), gm20b(state) {} SOC(const DeviceState &state) : gm20b(state) {}
}; };
} }

View File

@ -0,0 +1,20 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/address_space.inc>
#include "gm20b.h"
namespace skyline {
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GM20B::AddressSpaceBits>;
template class FlatMemoryManager<u64, 0, soc::gm20b::GM20B::AddressSpaceBits>;
}
namespace skyline::soc::gm20b {
GM20B::GM20B(const DeviceState &state) :
fermi2D(state),
keplerMemory(state),
maxwell3D(state),
maxwellCompute(state),
maxwellDma(state),
gpfifo(state) {}
}

View File

@ -3,23 +3,28 @@
#pragma once #pragma once
#include <common/address_space.h>
#include "gm20b/engines/maxwell_3d.h" #include "gm20b/engines/maxwell_3d.h"
#include "gm20b/gpfifo.h" #include "gm20b/gpfifo.h"
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
/** /**
* @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations * @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
* @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly * @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly
*/ */
class GM20B { class GM20B {
public: public:
static constexpr u8 AddressSpaceBits{40}; //!< The width of the GMMU AS
using GMMU = FlatMemoryManager<u64, 0, AddressSpaceBits>;
engine::Engine fermi2D; engine::Engine fermi2D;
engine::maxwell3d::Maxwell3D maxwell3D; engine::maxwell3d::Maxwell3D maxwell3D;
engine::Engine maxwellCompute; engine::Engine maxwellCompute;
engine::Engine maxwellDma; engine::Engine maxwellDma;
engine::Engine keplerMemory; engine::Engine keplerMemory;
GPFIFO gpfifo; GPFIFO gpfifo;
GMMU gmmu;
GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {} GM20B(const DeviceState &state);
}; };
} }

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <soc/gmmu.h> #include <common/address_space.h>
#include <soc/gm20b/engines/maxwell_3d.h> #include <soc/gm20b/engines/maxwell_3d.h>
namespace skyline::soc::gm20b::engine::maxwell3d { namespace skyline::soc::gm20b::engine::maxwell3d {

View File

@ -157,7 +157,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
switch (registers.semaphore.info.structureSize) { switch (registers.semaphore.info.structureSize) {
case Registers::SemaphoreInfo::StructureSize::OneWord: case Registers::SemaphoreInfo::StructureSize::OneWord:
state.soc->gmmu.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack()); state.soc->gm20b.gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result));
break; break;
case Registers::SemaphoreInfo::StructureSize::FourWords: { case Registers::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks // Convert the current nanosecond time to GPU ticks
@ -167,7 +167,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u64 nsTime{util::GetTimeNs()}; u64 nsTime{util::GetTimeNs()};
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
state.soc->gmmu.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack()); state.soc->gm20b.gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp});
break; break;
} }
} }

View File

@ -56,7 +56,7 @@ namespace skyline::soc::gm20b {
} }
pushBufferData.resize(gpEntry.size); pushBufferData.resize(gpEntry.size);
state.soc->gmmu.Read<u32>(pushBufferData, gpEntry.Address()); state.soc->gm20b.gmmu.Read<u32>(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it // An entry containing all zeroes is a NOP, skip over it
@ -88,8 +88,7 @@ namespace skyline::soc::gm20b {
return; return;
default: default:
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp)); throw exception("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
break;
} }
} }
} }
@ -106,7 +105,7 @@ namespace skyline::soc::gm20b {
try { try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
pushBuffers->Process([this](GpEntry gpEntry) { pushBuffers->Process([this](GpEntry gpEntry) {
state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address()); state.logger->Warn("Processing pushbuffer: 0x{:X}", gpEntry.Address());
Process(gpEntry); Process(gpEntry);
}); });
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {

View File

@ -1,214 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <kernel/types/KProcess.h>
#include "gmmu.h"
namespace skyline::soc::gmmu {
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) {
constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space
constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero
// Create the initial chunk that will be split to create new chunks
ChunkDescriptor baseChunk(gpuAddressSpaceBase, gpuAddressSpaceSize, nullptr, ChunkState::Unmapped);
chunks.push_back(baseChunk);
}
std::optional<ChunkDescriptor> GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool {
return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState;
})};
if (chunk != chunks.end())
return *chunk;
return std::nullopt;
}
u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
auto chunkEnd{chunks.end()};
for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) {
if (chunk->CanContain(newChunk)) {
auto oldChunk{*chunk};
u64 newSize{newChunk.virtualAddress - chunk->virtualAddress};
u64 extension{chunk->size - newSize - newChunk.size};
if (newSize == 0) {
*chunk = newChunk;
} else {
chunk->size = newSize;
chunk = chunks.insert(std::next(chunk), newChunk);
}
if (extension)
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
return newChunk.virtualAddress;
} else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) {
chunk->size = newChunk.virtualAddress - chunk->virtualAddress;
// Deletes all chunks that are within the chunk being inserted and split the final one
auto tailChunk{std::next(chunk)};
while (tailChunk != chunkEnd) {
if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size)
break;
tailChunk = chunks.erase(tailChunk);
chunkEnd = chunks.end();
}
// The given chunk is too large to fit into existing chunks
if (tailChunk == chunkEnd)
break;
u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress};
tailChunk->virtualAddress += chunkSliceOffset;
tailChunk->size -= chunkSliceOffset;
if (tailChunk->state == ChunkState::Mapped)
tailChunk->cpuPtr += chunkSliceOffset;
// If the size of the head chunk is zero then we can directly replace it with our new one rather than inserting it
auto headChunk{std::prev(tailChunk)};
if (headChunk->size == 0)
*headChunk = newChunk;
else
chunks.insert(std::next(headChunk), newChunk);
return newChunk.virtualAddress;
}
}
throw exception("Failed to insert chunk into GPU address space!");
}
u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) {
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)};
if (!newChunk) [[unlikely]]
return 0;
auto chunk{*newChunk};
chunk.size = size;
chunk.state = ChunkState::Reserved;
return InsertChunk(chunk);
}
u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved));
}
u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
auto mappedChunk{FindChunk(ChunkState::Unmapped, size)};
if (!mappedChunk) [[unlikely]]
return 0;
auto chunk{*mappedChunk};
chunk.cpuPtr = cpuPtr;
chunk.size = size;
chunk.state = ChunkState::Mapped;
return InsertChunk(chunk);
}
u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(mutex);
return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped));
}
bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return false;
try {
std::unique_lock lock(mutex);
InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped));
} catch (const std::exception &e) {
return false;
}
return true;
}
void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) {
std::shared_lock lock(mutex);
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtualAddress;
})};
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
chunk--;
u64 initialSize{size};
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *source{chunk->cpuPtr + chunkOffset};
u64 sourceSize{std::min(chunk->size - chunkOffset, size)};
// A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks
while (size) {
std::memcpy(destination + (initialSize - size), source, sourceSize);
size -= sourceSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
source = chunk->cpuPtr;
sourceSize = std::min(chunk->size, size);
}
}
}
void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) {
std::shared_lock lock(mutex);
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtualAddress;
})};
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
chunk--;
u64 initialSize{size};
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *destination{chunk->cpuPtr + chunkOffset};
u64 destinationSize{std::min(chunk->size - chunkOffset, size)};
// A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks
while (size) {
std::memcpy(destination, source + (initialSize - size), destinationSize);
size -= destinationSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
destination = chunk->cpuPtr;
destinationSize = std::min(chunk->size, size);
}
}
}
}

View File

@ -1,140 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::gmmu {
enum class ChunkState {
Unmapped, //!< The chunk is unmapped
Reserved, //!< The chunk is reserved
Mapped //!< The chunk is mapped and a CPU side address is present
};
struct ChunkDescriptor {
u64 virtualAddress; //!< The address of the chunk in the virtual address space
u64 size; //!< The size of the chunk in bytes
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
ChunkState state;
ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {}
/**
* @return If the given chunk can be contained wholly within this chunk
*/
inline bool CanContain(const ChunkDescriptor &chunk) {
return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress));
}
};
/**
* @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment
*/
class GraphicsMemoryManager {
private:
const DeviceState &state;
std::vector<ChunkDescriptor> chunks;
std::shared_mutex mutex;
/**
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
* @note vmmMutex MUST be locked when calling this
* @param desiredState The state of the chunk to find
* @param size The minimum size of the chunk to find
* @param alignment The minimum alignment of the chunk to find
* @return The first applicable chunk
*/
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
/**
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
* @note vmmMutex MUST be locked when calling this
* @param newChunk The chunk to insert
* @return The base virtual address of the inserted chunk
*/
u64 InsertChunk(const ChunkDescriptor &newChunk);
public:
GraphicsMemoryManager(const DeviceState &state);
/**
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
* @param size The size of the region to reserve
* @param alignment The alignment of the region to reserve
* @return The base virtual address of the reserved region
*/
u64 ReserveSpace(u64 size, u64 alignment);
/**
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
* @param virtualAddress The virtual base address of the region to allocate
* @param size The size of the region to allocate
* @return The base virtual address of the reserved region
*/
u64 ReserveFixed(u64 virtualAddress, u64 size);
/**
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
*/
u64 MapAllocate(u8 *cpuPtr, u64 size);
/**
* @brief Maps a CPU memory region to a fixed region in the virtual address space
* @param virtualAddress The target virtual address of the region
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
*/
u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size);
/**
* @brief Unmaps all chunks in the given region from the virtual address space
* @return Whether the operation succeeded
*/
bool Unmap(u64 virtualAddress, u64 size);
void Read(u8 *destination, u64 virtualAddress, u64 size);
/**
* @brief Reads in a span from a region of the virtual address space
*/
template<typename T>
void Read(span <T> destination, u64 virtualAddress) {
Read(reinterpret_cast<u8 *>(destination.data()), virtualAddress, destination.size_bytes());
}
/**
* @brief Reads in an object from a region of the virtual address space
* @tparam T The type of object to return
*/
template<typename T>
T Read(u64 virtualAddress) {
T obj;
Read(reinterpret_cast<u8 *>(&obj), virtualAddress, sizeof(T));
return obj;
}
void Write(u8 *source, u64 virtualAddress, u64 size);
/**
* @brief Writes out a span to a region of the virtual address space
*/
template<typename T>
void Write(span <T> source, u64 virtualAddress) {
Write(reinterpret_cast<u8 *>(source.data()), virtualAddress, source.size_bytes());
}
/**
* @brief Reads in an object from a region of the virtual address space
*/
template<typename T>
void Write(T source, u64 virtualAddress) {
Write(reinterpret_cast<u8 *>(&source), virtualAddress, sizeof(T));
}
};
}