From eb25f600335d3dc3a09a83b2d23427a164217da3 Mon Sep 17 00:00:00 2001 From: Billy Laws <blaws05@gmail.com> Date: Fri, 8 Oct 2021 20:25:21 +0100 Subject: [PATCH] Implement multichannel support for GPU Allows the execution of multiple channels at the same time, with locking being performed on the host GPU scheduler layer, address spaces can be bound to one or more channels. --- app/CMakeLists.txt | 4 +- .../gpu/interconnect/graphics_context.h | 8 +-- .../main/cpp/skyline/services/common/result.h | 1 + .../services/nvdrv/devices/nvdevice.cpp | 6 ++- .../skyline/services/nvdrv/devices/nvdevice.h | 7 ++- .../services/nvdrv/devices/nvhost/as_gpu.cpp | 51 ++++++++++++++----- .../services/nvdrv/devices/nvhost/as_gpu.h | 6 ++- .../services/nvdrv/devices/nvhost/ctrl.cpp | 2 +- .../services/nvdrv/devices/nvhost/ctrl.h | 2 +- .../nvdrv/devices/nvhost/ctrl_gpu.cpp | 4 +- .../services/nvdrv/devices/nvhost/ctrl_gpu.h | 2 +- .../nvdrv/devices/nvhost/gpu_channel.cpp | 32 +++++++++--- .../nvdrv/devices/nvhost/gpu_channel.h | 15 ++++-- .../skyline/services/nvdrv/devices/nvmap.cpp | 2 +- .../skyline/services/nvdrv/devices/nvmap.h | 2 +- .../cpp/skyline/services/nvdrv/driver.cpp | 17 +++++-- .../main/cpp/skyline/services/nvdrv/driver.h | 13 ++++- app/src/main/cpp/skyline/soc.h | 5 +- app/src/main/cpp/skyline/soc/gm20b.cpp | 15 ------ .../main/cpp/skyline/soc/gm20b/channel.cpp | 17 +++++++ .../skyline/soc/{gm20b.h => gm20b/channel.h} | 22 ++++---- .../skyline/soc/gm20b/engines/maxwell_3d.cpp | 9 ++-- .../skyline/soc/gm20b/engines/maxwell_3d.h | 8 ++- app/src/main/cpp/skyline/soc/gm20b/gmmu.h | 6 ++- app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp | 41 +++++++-------- app/src/main/cpp/skyline/soc/gm20b/gpfifo.h | 21 ++++---- 26 files changed, 209 insertions(+), 109 deletions(-) delete mode 100644 app/src/main/cpp/skyline/soc/gm20b.cpp create mode 100644 app/src/main/cpp/skyline/soc/gm20b/channel.cpp rename app/src/main/cpp/skyline/soc/{gm20b.h => gm20b/channel.h} (58%) diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 97748fb5..e3e09a20 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -103,12 +103,12 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp - ${source_DIR}/skyline/soc/gm20b.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp + ${source_DIR}/skyline/soc/gm20b/channel.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp + ${source_DIR}/skyline/soc/gm20b/gmmu.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp - ${source_DIR}/skyline/soc/gm20b/gmmu.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp ${source_DIR}/skyline/input/touch.cpp diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 9795a5fd..923c027a 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -4,8 +4,10 @@ #pragma once #include <gpu/texture/format.h> +#include <soc/gm20b/channel.h> #include <soc/gm20b/gmmu.h> #include <soc/gm20b/engines/maxwell/types.h> + #include "command_executor.h" namespace skyline::gpu::interconnect { @@ -18,7 +20,7 @@ namespace skyline::gpu::interconnect { class GraphicsContext { private: GPU &gpu; - soc::gm20b::GMMU &gmmu; + soc::gm20b::ChannelContext &channelCtx; gpu::interconnect::CommandExecutor &executor; struct RenderTarget { @@ -50,7 +52,7 @@ namespace skyline::gpu::interconnect { public: - GraphicsContext(GPU &gpu, soc::gm20b::GMMU &gmmu, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), gmmu(gmmu), executor(executor) { + GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) { scissors.fill(DefaultScissor); } @@ -182,7 +184,7 @@ namespace skyline::gpu::interconnect { if (renderTarget.guest.mappings.empty()) { auto size{std::max<u64>(renderTarget.guest.layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer), renderTarget.guest.format->GetSize(renderTarget.guest.dimensions))}; - auto mappings{gmmu.TranslateRange(renderTarget.gpuAddress, size)}; + auto mappings{channelCtx.asCtx->gmmu.TranslateRange(renderTarget.gpuAddress, size)}; renderTarget.guest.mappings.assign(mappings.begin(), mappings.end()); } diff --git a/app/src/main/cpp/skyline/services/common/result.h b/app/src/main/cpp/skyline/services/common/result.h index 6e221450..54bef066 100644 --- a/app/src/main/cpp/skyline/services/common/result.h +++ b/app/src/main/cpp/skyline/services/common/result.h @@ -11,6 +11,7 @@ namespace skyline::service { NotPermitted = 1, // EPERM TryAgain = 11, // EAGAIN Busy = 16, // EBUSY + FileExists = 17, // EEXIST InvalidArgument = 22, // EINVAL InappropriateIoctlForDevice = 25, // ENOTTY FunctionNotImplemented = 38, // ENOSYS diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp index 1c2aca70..6e42601f 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp @@ -5,7 +5,11 @@ #include "nvdevice.h" namespace skyline::service::nvdrv::device { - NvDevice::NvDevice(const DeviceState &state, Core &core, const SessionContext &ctx) : state(state), core(core), ctx(ctx) {} + NvDevice::NvDevice(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : + state(state), + driver(driver), + core(core), + ctx(ctx) {} const std::string &NvDevice::GetName() { if (name.empty()) { diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h index 60e0c87e..1a39b8f3 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h @@ -11,6 +11,10 @@ #include "deserialisation/types.h" +namespace skyline::service::nvdrv { + class Driver; +} + namespace skyline::service::nvdrv::device { using namespace kernel; using namespace deserialisation; @@ -24,11 +28,12 @@ namespace skyline::service::nvdrv::device { protected: const DeviceState &state; + Driver &driver; Core &core; SessionContext ctx; public: - NvDevice(const DeviceState &state, Core &core, const SessionContext &ctx); + NvDevice(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); virtual ~NvDevice() = default; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp index 3b294273..2ddd5d72 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp @@ -3,7 +3,10 @@ #include <common/address_space.inc> #include <soc.h> +#include <soc/gm20b/gmmu.h> +#include <services/nvdrv/driver.h> #include <services/nvdrv/devices/deserialisation/deserialisation.h> +#include "gpu_channel.h" #include "as_gpu.h" namespace skyline { @@ -14,10 +17,31 @@ namespace skyline { namespace skyline::service::nvdrv::device::nvhost { using GMMU = soc::gm20b::GMMU; - AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} + AsGpu::AsGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {} PosixResult AsGpu::BindChannel(In<FileDescriptor> channelFd) { - // TODO: support once multiple address spaces are supported + std::scoped_lock lock(mutex); + + if (!vm.initialised) + return PosixResult::InvalidArgument; + + try { + std::shared_lock gpuLock(driver.deviceMutex); + auto &gpuCh{dynamic_cast<GpuChannel &>(*driver.devices.at(channelFd))}; + + std::scoped_lock channelLock(gpuCh.channelMutex); + + if (gpuCh.asCtx) { + state.logger->Warn("Attempting to bind multiple ASes to a single GPU channel"); + return PosixResult::InvalidArgument; + } + + gpuCh.asCtx = asCtx; + } catch (const std::out_of_range &e) { + state.logger->Warn("Attempting to bind AS to an invalid channel: {}", channelFd); + return PosixResult::InvalidArgument; + } + return PosixResult::Success; } @@ -53,7 +77,7 @@ namespace skyline::service::nvdrv::device::nvhost { u64 size{static_cast<u64>(pages) * pageSize}; if (flags.sparse) - state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), size, {true}); + asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), size, {true}); allocationMap[offset] = { .size = size, @@ -77,9 +101,9 @@ namespace skyline::service::nvdrv::device::nvhost { // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparseAlloc) - state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); + asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); else - state.soc->gm20b.gmmu.Unmap(offset, mapping->size); + asCtx->gmmu.Unmap(offset, mapping->size); mappingMap.erase(offset); } @@ -103,7 +127,7 @@ namespace skyline::service::nvdrv::device::nvhost { // Unset sparse flag if required if (allocation.sparse) - state.soc->gm20b.gmmu.Unmap(offset, allocation.size); + asCtx->gmmu.Unmap(offset, allocation.size); auto &allocator{pageSize == VM::PageSize ? vm.smallPageAllocator : vm.bigPageAllocator}; u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits}; @@ -138,9 +162,9 @@ namespace skyline::service::nvdrv::device::nvhost { // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparseAlloc) - state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); + asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); else - state.soc->gm20b.gmmu.Unmap(offset, mapping->size); + asCtx->gmmu.Unmap(offset, mapping->size); mappingMap.erase(offset); } catch (const std::out_of_range &e) { @@ -172,7 +196,7 @@ namespace skyline::service::nvdrv::device::nvhost { u64 gpuAddress{offset + bufferOffset}; u8 *cpuPtr{mapping->ptr + bufferOffset}; - state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize); + asCtx->gmmu.Map(gpuAddress, cpuPtr, mappingSize); return PosixResult::Success; } catch (const std::out_of_range &e) { @@ -194,7 +218,7 @@ namespace skyline::service::nvdrv::device::nvhost { if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size) throw exception("Cannot perform a fixed mapping into an unallocated region!"); - state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); + asCtx->gmmu.Map(offset, cpuPtr, size); auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, true, false, alloc->second.sparse)}; alloc->second.mappings.push_back(mapping); @@ -214,7 +238,7 @@ namespace skyline::service::nvdrv::device::nvhost { u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits}; offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits; - state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); + asCtx->gmmu.Map(offset, cpuPtr, size); auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)}; mappingMap[offset] = mapping; @@ -292,6 +316,7 @@ namespace skyline::service::nvdrv::device::nvhost { u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits}; vm.bigPageAllocator = std::make_unique<VM::Allocator>(startBigPages, endBigPages); + asCtx = std::make_shared<soc::gm20b::AddressSpaceContext>(); vm.initialised = true; return PosixResult::Success; @@ -320,7 +345,7 @@ namespace skyline::service::nvdrv::device::nvhost { } if (!entry.handle) { - state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GMMU::SparsePlaceholderAddress(), size, {true}); + asCtx->gmmu.Map(virtAddr, GMMU::SparsePlaceholderAddress(), size, {true}); } else { auto h{core.nvMap.GetHandle(entry.handle)}; if (!h) @@ -328,7 +353,7 @@ namespace skyline::service::nvdrv::device::nvhost { u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << vm.bigPageSizeBits))}; - state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size); + asCtx->gmmu.Map(virtAddr, cpuPtr, size); } } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h index 00529fe7..26c7ff00 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h @@ -4,7 +4,7 @@ #pragma once #include <common/address_space.h> - +#include <soc/gm20b/gmmu.h> #include <services/nvdrv/devices/nvdevice.h> namespace skyline::service::nvdrv::device::nvhost { @@ -65,6 +65,8 @@ namespace skyline::service::nvdrv::device::nvhost { bool initialised{}; } vm; + std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; + void FreeMappingLocked(u64 offset); public: @@ -95,7 +97,7 @@ namespace skyline::service::nvdrv::device::nvhost { }; static_assert(sizeof(RemapEntry) == 0x14); - AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx); + AsGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Binds this address space to a channel diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp index bb1798da..2d095534 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp @@ -34,7 +34,7 @@ namespace skyline::service::nvdrv::device::nvhost { state == SyncpointEvent::State::Signalling; } - Ctrl::Ctrl(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} + Ctrl::Ctrl(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {} u32 Ctrl::FindFreeSyncpointEvent(u32 syncpointId) { u32 eventSlot{SyncpointEventCount}; //!< Holds the slot of the last populated event in the event array diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h index 51c357a7..d596571f 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h @@ -96,7 +96,7 @@ namespace skyline::service::nvdrv::device::nvhost { PosixResult SyncpointFreeEventLocked(In<u32> slot); public: - Ctrl(const DeviceState &state, Core &core, const SessionContext &ctx); + Ctrl(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Clears a syncpoint event diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp index f78810a1..09f93bf5 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp @@ -5,8 +5,8 @@ #include "ctrl_gpu.h" namespace skyline::service::nvdrv::device::nvhost { - CtrlGpu::CtrlGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : - NvDevice(state, core, ctx), + CtrlGpu::CtrlGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : + NvDevice(state, driver, core, ctx), errorNotifierEvent(std::make_shared<type::KEvent>(state, false)), unknownEvent(std::make_shared<type::KEvent>(state, false)) {} diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h index 2c202c6b..ff93b41b 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h @@ -73,7 +73,7 @@ namespace skyline::service::nvdrv::device::nvhost { u32 subregionCount{0x10}; }; - CtrlGpu(const DeviceState &state, Core &core, const SessionContext &ctx); + CtrlGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Returns the zcull context size diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp index 29b652fe..2bebb602 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp @@ -6,8 +6,8 @@ #include "gpu_channel.h" namespace skyline::service::nvdrv::device::nvhost { - GpuChannel::GpuChannel(const DeviceState &state, Core &core, const SessionContext &ctx) : - NvDevice(state, core, ctx), + GpuChannel::GpuChannel(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : + NvDevice(state, driver, core, ctx), smExceptionBreakpointIntReportEvent(std::make_shared<type::KEvent>(state, false)), smExceptionBreakpointPauseReportEvent(std::make_shared<type::KEvent>(state, false)), errorNotifierEvent(std::make_shared<type::KEvent>(state, false)) { @@ -39,16 +39,20 @@ namespace skyline::service::nvdrv::device::nvhost { if (flags.incrementWithValue) return PosixResult::InvalidArgument; - if (core.syncpointManager.IsFenceSignalled(fence)) + if (!core.syncpointManager.IsFenceSignalled(fence)) throw exception("Waiting on a fence through SubmitGpfifo is unimplemented"); } - state.soc->gm20b.gpfifo.Push(gpEntries.subspan(0, numEntries)); + { + std::scoped_lock lock(channelMutex); - fence.id = channelSyncpoint; + channelCtx->gpfifo.Push(gpEntries.subspan(0, numEntries)); - u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)}; - fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment); + fence.id = channelSyncpoint; + + u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)}; + fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment); + } if (flags.fenceIncrement) throw exception("Incrementing a fence through SubmitGpfifo is unimplemented"); @@ -84,7 +88,19 @@ namespace skyline::service::nvdrv::device::nvhost { PosixResult GpuChannel::AllocGpfifoEx2(In<u32> numEntries, In<u32> numJobs, In<u32> flags, Out<Fence> fence) { state.logger->Debug("numEntries: {}, numJobs: {}, flags: 0x{:X}", numEntries, numJobs, flags); - state.soc->gm20b.gpfifo.Initialize(numEntries); + + std::scoped_lock lock(channelMutex); + if (!asCtx) { + state.logger->Warn("Trying to allocate a channel without a bound address space"); + return PosixResult::InvalidArgument; + } + + if (channelCtx) { + state.logger->Warn("Trying to allocate a channel twice!"); + return PosixResult::FileExists; + } + + channelCtx = std::make_unique<soc::gm20b::ChannelContext>(state, asCtx, numEntries); fence = core.syncpointManager.GetSyncpointFence(channelSyncpoint); diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h index b9a807f3..a8c71bad 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h @@ -3,23 +3,32 @@ #pragma once -#include <soc/gm20b/gpfifo.h> #include <services/common/fence.h> +#include <soc/gm20b/engines/maxwell_3d.h> // TODO: remove +#include <soc/gm20b/channel.h> #include "services/nvdrv/devices/nvdevice.h" namespace skyline::service::nvdrv::device::nvhost { + class AsGpu; + /** * @brief nvhost::GpuChannel is used to create and submit commands to channels which are effectively GPU processes * @url https://switchbrew.org/wiki/NV_services#Channels */ class GpuChannel : public NvDevice { private: - u32 channelSyncpoint{}; + u32 channelSyncpoint{}; //!< The syncpoint for submissions allocated to this channel in `AllocGpfifo` u32 channelUserData{}; + std::mutex channelMutex; std::shared_ptr<type::KEvent> smExceptionBreakpointIntReportEvent; std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent; std::shared_ptr<type::KEvent> errorNotifierEvent; + std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; + std::unique_ptr<soc::gm20b::ChannelContext> channelCtx; + + friend AsGpu; + public: /** * @brief A bitfield of the flags that can be supplied for a specific GPFIFO submission @@ -37,7 +46,7 @@ namespace skyline::service::nvdrv::device::nvhost { u32 raw; }; - GpuChannel(const DeviceState &state, Core &core, const SessionContext &ctx); + GpuChannel(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Sets the nvmap handle id to be used for channel submits (does nothing for GPU channels) diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp index 7a0bfe47..274cf9ed 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp @@ -6,7 +6,7 @@ #include "nvmap.h" namespace skyline::service::nvdrv::device { - NvMap::NvMap(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} + NvMap::NvMap(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {} PosixResult NvMap::Create(In<u32> size, Out<NvMapCore::Handle::Id> handle) { auto handleDesc{core.nvMap.CreateHandle(util::AlignUp(size, PAGE_SIZE))}; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h index 7846e317..74fe4557 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h @@ -23,7 +23,7 @@ namespace skyline::service::nvdrv::device { IsSharedMemMapped = 6 }; - NvMap(const DeviceState &state, Core &core, const SessionContext &ctx); + NvMap(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Creates an nvmap handle for the given size diff --git a/app/src/main/cpp/skyline/services/nvdrv/driver.cpp b/app/src/main/cpp/skyline/services/nvdrv/driver.cpp index 010c737f..0f551f44 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/driver.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/driver.cpp @@ -23,10 +23,13 @@ namespace skyline::service::nvdrv { break; \ } - #define DEVICE_CASE(path, object) \ - case util::Hash(path): \ - devices.emplace(fd, std::make_unique<device::object>(state, core, ctx)); \ - return NvResult::Success; + #define DEVICE_CASE(path, object) \ + case util::Hash(path): \ + { \ + std::unique_lock lock(deviceMutex); \ + devices.emplace(fd, std::make_unique<device::object>(state, *this, core, ctx)); \ + return NvResult::Success; \ + } DEVICE_SWITCH( DEVICE_CASE("/dev/nvmap", NvMap) @@ -69,13 +72,13 @@ namespace skyline::service::nvdrv { default: throw exception("Unhandled POSIX result: {}!", static_cast<i32>(result)); } - } NvResult Driver::Ioctl(u32 fd, IoctlDescriptor cmd, span<u8> buffer) { state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return ConvertResult(devices.at(fd)->Ioctl(cmd, buffer)); } catch (const std::out_of_range &) { throw exception("Ioctl was called with invalid file descriptor: {}", fd); @@ -86,6 +89,7 @@ namespace skyline::service::nvdrv { state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return ConvertResult(devices.at(fd)->Ioctl2(cmd, buffer, inlineBuffer)); } catch (const std::out_of_range &) { throw exception("Ioctl2 was called with invalid file descriptor: 0x{:X}", fd); @@ -96,6 +100,7 @@ namespace skyline::service::nvdrv { state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return ConvertResult(devices.at(fd)->Ioctl3(cmd, buffer, inlineBuffer)); } catch (const std::out_of_range &) { throw exception("Ioctl3 was called with invalid file descriptor: {}", fd); @@ -104,6 +109,7 @@ namespace skyline::service::nvdrv { void Driver::CloseDevice(u32 fd) { try { + std::unique_lock lock(deviceMutex); devices.erase(fd); } catch (const std::out_of_range &) { state.logger->Warn("Trying to close non-existent file descriptor: {}"); @@ -114,6 +120,7 @@ namespace skyline::service::nvdrv { state.logger->Debug("fd: {}, eventId: 0x{:X}, device: {}", fd, eventId, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return devices.at(fd)->QueryEvent(eventId); } catch (const std::exception &) { throw exception("QueryEvent was called with invalid file descriptor: {}", fd); diff --git a/app/src/main/cpp/skyline/services/nvdrv/driver.h b/app/src/main/cpp/skyline/services/nvdrv/driver.h index 2e827fc3..c3d2a4bb 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/driver.h +++ b/app/src/main/cpp/skyline/services/nvdrv/driver.h @@ -4,16 +4,27 @@ #pragma once #include <common.h> +#include <kernel/types/KEvent.h> #include "types.h" -#include "devices/nvdevice.h" #include "core/core.h" +#include "devices/nvdevice.h" namespace skyline::service::nvdrv { + namespace device { + namespace nvhost { + class AsGpu; + } + } + class Driver { private: const DeviceState &state; + + std::shared_mutex deviceMutex; //!< Protects access to `devices` std::unordered_map<FileDescriptor, std::unique_ptr<device::NvDevice>> devices; + friend device::nvhost::AsGpu; // For channel address space binding + public: Core core; //!< The core global state object of nvdrv that is accessed by devices diff --git a/app/src/main/cpp/skyline/soc.h b/app/src/main/cpp/skyline/soc.h index 717bb321..32c13d59 100644 --- a/app/src/main/cpp/skyline/soc.h +++ b/app/src/main/cpp/skyline/soc.h @@ -4,7 +4,7 @@ #pragma once #include "soc/host1x.h" -#include "soc/gm20b.h" +#include "soc/gm20b/gpfifo.h" namespace skyline::soc { /** @@ -14,8 +14,7 @@ namespace skyline::soc { class SOC { public: host1x::Host1X host1x; - gm20b::GM20B gm20b; - SOC(const DeviceState &state) : gm20b(state) {} + SOC(const DeviceState &state) {} }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b.cpp b/app/src/main/cpp/skyline/soc/gm20b.cpp deleted file mode 100644 index 206b8e8a..00000000 --- a/app/src/main/cpp/skyline/soc/gm20b.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 -// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) - -#include "gm20b.h" - -namespace skyline::soc::gm20b { - GM20B::GM20B(const DeviceState &state) : - fermi2D(state), - keplerMemory(state), - maxwell3D(state, gmmu, executor), - maxwellCompute(state), - maxwellDma(state), - gpfifo(state), - executor(state) {} -} diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp new file mode 100644 index 00000000..73622af4 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "engines/maxwell_3d.h" //TODO: remove +#include "channel.h" + +namespace skyline::soc::gm20b { + ChannelContext::ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries) : + fermi2D(state), + keplerMemory(state), + maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, executor)), + maxwellCompute(state), + maxwellDma(state), + gpfifo(state, *this, numEntries), + executor(state), + asCtx(std::move(asCtx)){} +} diff --git a/app/src/main/cpp/skyline/soc/gm20b.h b/app/src/main/cpp/skyline/soc/gm20b/channel.h similarity index 58% rename from app/src/main/cpp/skyline/soc/gm20b.h rename to app/src/main/cpp/skyline/soc/gm20b/channel.h index 73058738..e6d9b4db 100644 --- a/app/src/main/cpp/skyline/soc/gm20b.h +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h @@ -4,26 +4,30 @@ #pragma once #include <gpu/interconnect/command_executor.h> -#include "gm20b/engines/maxwell_3d.h" -#include "gm20b/gpfifo.h" -#include "gm20b/gmmu.h" +#include "engines/engine.h" +#include "gpfifo.h" namespace skyline::soc::gm20b { + namespace engine::maxwell3d { + class Maxwell3D; + } + + struct AddressSpaceContext; + /** * @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations * @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly */ - class GM20B { - public: - GMMU gmmu; + struct ChannelContext { + std::shared_ptr<AddressSpaceContext> asCtx; gpu::interconnect::CommandExecutor executor; engine::Engine fermi2D; - engine::maxwell3d::Maxwell3D maxwell3D; + std::unique_ptr<engine::maxwell3d::Maxwell3D> maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file engine::Engine maxwellCompute; engine::Engine maxwellDma; engine::Engine keplerMemory; - GPFIFO gpfifo; + ChannelGpfifo gpfifo; - GM20B(const DeviceState &state); + ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries); }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index c869e86e..66884a15 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -3,10 +3,11 @@ // Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) #include <boost/preprocessor/repeat.hpp> +#include "maxwell_3d.h" #include <soc.h> namespace skyline::soc::gm20b::engine::maxwell3d { - Maxwell3D::Maxwell3D(const DeviceState &state, GMMU &gmmu, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, gmmu, executor) { + Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, channelCtx, executor), channelCtx(channelCtx) { ResetRegs(); } @@ -244,7 +245,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { MAXWELL3D_CASE(syncpointAction, { state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id)); - state.soc->gm20b.executor.Execute(); + channelCtx.executor.Execute(); state.soc->host1x.syncpoints.at(syncpointAction.id).Increment(); }) @@ -307,7 +308,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { switch (registers.semaphore.info.structureSize) { case type::SemaphoreInfo::StructureSize::OneWord: - state.soc->gm20b.gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result)); + channelCtx.asCtx->gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result)); break; case type::SemaphoreInfo::StructureSize::FourWords: { @@ -318,7 +319,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u64 nsTime{util::GetTimeNs()}; u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; - state.soc->gm20b.gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp}); + channelCtx.asCtx->gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp}); break; } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 6a43f637..01862c26 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -8,6 +8,10 @@ #include "engine.h" #include "maxwell/macro_interpreter.h" +namespace skyline::soc::gm20b { + struct ChannelContext; +} + namespace skyline::soc::gm20b::engine::maxwell3d { /** * @brief The Maxwell 3D engine handles processing 3D graphics @@ -245,9 +249,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Registers registers{}; Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register + ChannelContext &channelCtx; + std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow - Maxwell3D(const DeviceState &state, GMMU &gmmu, gpu::interconnect::CommandExecutor &executor); + Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor); /** * @brief Resets the Maxwell 3D registers to their default values diff --git a/app/src/main/cpp/skyline/soc/gm20b/gmmu.h b/app/src/main/cpp/skyline/soc/gm20b/gmmu.h index 00eb02a5..9eb3cadd 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gmmu.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gmmu.h @@ -10,8 +10,12 @@ namespace skyline::soc::gm20b { /** * @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1 - * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment + * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction * @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager */ using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>; + + struct AddressSpaceContext { + GMMU gmmu; + }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index bf32f1c4..2d895f5f 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -6,6 +6,7 @@ #include <kernel/types/KProcess.h> #include <soc.h> #include <os.h> +#include "engines/maxwell_3d.h" namespace skyline::soc::gm20b { /** @@ -58,7 +59,14 @@ namespace skyline::soc::gm20b { }; static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32)); - void GPFIFO::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) { + ChannelGpfifo::ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries) : + state(state), + gpfifoEngine(state), + channelCtx(channelCtx), + gpEntries(numEntries), + thread(std::thread(&ChannelGpfifo::Run, this)) {} + + void ChannelGpfifo::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) { constexpr u32 ThreeDSubChannel{0}; constexpr u32 ComputeSubChannel{1}; constexpr u32 Inline2MemorySubChannel{2}; @@ -72,19 +80,19 @@ namespace skyline::soc::gm20b { } else { switch (subChannel) { case ThreeDSubChannel: - state.soc->gm20b.maxwell3D.CallMethod(method, argument, lastCall); + channelCtx.maxwell3D->CallMethod(method, argument, lastCall); break; case ComputeSubChannel: - state.soc->gm20b.maxwellCompute.CallMethod(method, argument, lastCall); + channelCtx.maxwellCompute.CallMethod(method, argument, lastCall); break; case Inline2MemorySubChannel: - state.soc->gm20b.keplerMemory.CallMethod(method, argument, lastCall); + channelCtx.keplerMemory.CallMethod(method, argument, lastCall); break; case TwoDSubChannel: - state.soc->gm20b.fermi2D.CallMethod(method, argument, lastCall); + channelCtx.fermi2D.CallMethod(method, argument, lastCall); break; case CopySubChannel: - state.soc->gm20b.maxwellDma.CallMethod(method, argument, lastCall); + channelCtx.maxwellDma.CallMethod(method, argument, lastCall); break; default: throw exception("Tried to call into a software subchannel: {}!", subChannel); @@ -92,7 +100,7 @@ namespace skyline::soc::gm20b { } } - void GPFIFO::Process(GpEntry gpEntry) { + void ChannelGpfifo::Process(GpEntry gpEntry) { if (!gpEntry.size) { // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers switch (gpEntry.opcode) { @@ -105,7 +113,7 @@ namespace skyline::soc::gm20b { } pushBufferData.resize(gpEntry.size); - state.soc->gm20b.gmmu.Read<u32>(pushBufferData, gpEntry.Address()); + channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address()); for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { // An entry containing all zeroes is a NOP, skip over it @@ -142,18 +150,11 @@ namespace skyline::soc::gm20b { } } - void GPFIFO::Initialize(size_t numBuffers) { - if (pushBuffers) - throw exception("GPFIFO Initialization cannot be done multiple times"); - pushBuffers.emplace(numBuffers); - thread = std::thread(&GPFIFO::Run, this); - } - - void GPFIFO::Run() { + void ChannelGpfifo::Run() { pthread_setname_np(pthread_self(), "GPFIFO"); try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); - pushBuffers->Process([this](GpEntry gpEntry) { + gpEntries.Process([this](GpEntry gpEntry) { state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address()); Process(gpEntry); }); @@ -170,11 +171,11 @@ namespace skyline::soc::gm20b { } } - void GPFIFO::Push(span<GpEntry> entries) { - pushBuffers->Append(entries); + void ChannelGpfifo::Push(span<GpEntry> entries) { + gpEntries.Append(entries); } - GPFIFO::~GPFIFO() { + ChannelGpfifo::~ChannelGpfifo() { if (thread.joinable()) { pthread_kill(thread.native_handle(), SIGINT); thread.join(); diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h index 318fed61..931c0e16 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h @@ -7,6 +7,8 @@ #include "engines/gpfifo.h" namespace skyline::soc::gm20b { + struct ChannelContext; + /** * @brief A GPFIFO entry as submitted through 'SubmitGpfifo' * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt @@ -73,15 +75,16 @@ namespace skyline::soc::gm20b { static_assert(sizeof(GpEntry) == sizeof(u64)); /** - * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them + * @brief The ChannelGpfifo class handles creating pushbuffers from GP entries and then processing them for a single channel + * @note A single ChannelGpfifo thread exists per channel with a single shared mutex in `GPFIFO` to enforce that only one channel can run at a time * @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 */ - class GPFIFO { + class ChannelGpfifo { const DeviceState &state; + ChannelContext &channelCtx; engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls - std::array<engine::Engine*, 8> subchannels; - std::optional<CircularQueue<GpEntry>> pushBuffers; + CircularQueue<GpEntry> gpEntries; std::thread thread; //!< The thread that manages processing of pushbuffers std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations @@ -96,14 +99,12 @@ namespace skyline::soc::gm20b { void Process(GpEntry gpEntry); public: - GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {} - - ~GPFIFO(); - /** - * @param numBuffers The amount of push-buffers to allocate in the circular buffer + * @param numEntries The number of gpEntries to allocate space for in the FIFO */ - void Initialize(size_t numBuffers); + ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries); + + ~ChannelGpfifo(); /** * @brief Executes all pending entries in the FIFO