From b7d0f2fafabad3a940aa863dbc2dc083a2c86993 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Wed, 13 Oct 2021 21:46:30 +0100 Subject: [PATCH] Implement support for pushbuffer methods split across multiple GpEntries These are used heavily in OpenGL games, which now, together with the previous syncpoint changes, work perfectly. The actual implementation is rather novel as rather than using a per-class state machine for all methods we only use it for those that are known to be split across GpEntry boundaries, as a result only a single bounds check is added to the hot path of contiguous method execution and the performance loss is negligible. --- .../services/nvdrv/devices/nvhost/as_gpu.h | 4 +- .../nvdrv/devices/nvhost/gpu_channel.h | 12 +-- app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp | 85 ++++++++++++++++--- app/src/main/cpp/skyline/soc/gm20b/gpfifo.h | 24 +++++- 4 files changed, 106 insertions(+), 19 deletions(-) diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h index a9a64062..1487b2de 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h @@ -62,12 +62,12 @@ namespace skyline::service::nvdrv::device::nvhost { using Allocator = FlatAllocator; std::unique_ptr bigPageAllocator; - std::shared_ptr smallPageAllocator; // Shared as this is also used by nvhost::GpuChannel + std::shared_ptr smallPageAllocator; //! Shared as this is also used by nvhost::GpuChannel bool initialised{}; } vm; - std::shared_ptr asCtx; + std::shared_ptr asCtx; //!< The guest GPU AS context that is associated with each particular instance friend GpuChannel; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h index 5d35e7a1..b5f3461c 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h @@ -23,14 +23,14 @@ namespace skyline::service::nvdrv::device::nvhost { std::shared_ptr smExceptionBreakpointPauseReportEvent; std::shared_ptr errorNotifierEvent; - std::shared_ptr asCtx; - std::shared_ptr asAllocator; - std::unique_ptr channelCtx; + std::shared_ptr asCtx; //!< The guest GPU AS context submits from this channel are bound to + std::shared_ptr asAllocator; //!< The small page allocator context for the AS that's bound to this channel, used to allocate space for `pushBufferMemory` + std::unique_ptr channelCtx; //!< The entire guest GPU context specific to this channel - u64 pushBufferAddr{}; - size_t pushBufferMemoryOffset{}; - std::vector pushBufferMemory; + u64 pushBufferAddr{}; //!< The GPU address `pushBufferMemory` is mapped to + size_t pushBufferMemoryOffset{}; //!< The current offset for which to write new pushbuffer method data into for post-increment and pre-wait + std::vector pushBufferMemory; //!< Mapped into the guest GPU As and used to store method data for pre/post increment commands friend AsGpu; diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index cc3cb865..23c580d5 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -115,28 +115,92 @@ namespace skyline::soc::gm20b { pushBufferData.resize(gpEntry.size); channelCtx.asCtx->gmmu.Read(pushBufferData, gpEntry.Address()); - for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { + // There will be at least one entry here + auto entry{pushBufferData.begin()}; + + // Executes the current split method, returning once execution is finished or the current GpEntry has reached its end + auto resumeSplitMethod{[&](){ + switch (resumeState.state) { + case MethodResumeState::State::Inc: + while (entry != pushBufferData.end() && resumeState.remaining) + Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); + + break; + case MethodResumeState::State::OneInc: + Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); + + // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries + resumeState.state = MethodResumeState::State::NonInc; + [[fallthrough]]; + case MethodResumeState::State::NonInc: + while (entry != pushBufferData.end() && resumeState.remaining) + Send(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); + + break; + } + }}; + + // We've a method from a previous GpEntry that needs resuming + if (resumeState.remaining) + resumeSplitMethod(); + + // Process more methods if the entries are still not all used up after handling resuming + for (; entry != pushBufferData.end(); entry++) { // An entry containing all zeroes is a NOP, skip over it if (*entry == 0) continue; PushBufferMethodHeader methodHeader{.raw = *entry}; + + // Needed in order to check for methods split across multiple GpEntries + auto remainingEntries{std::distance(entry, pushBufferData.end()) - 1}; + + // Handles storing state and initial execution for methods that are split across multiple GpEntries + auto startSplitMethod{[&](auto methodState) { + resumeState = { + .remaining = methodHeader.methodCount, + .address = methodHeader.methodAddress, + .subChannel = methodHeader.methodSubChannel, + .state = methodState + }; + + // Skip over method header as `resumeSplitMethod` doesn't expect it to be there + entry++; + + resumeSplitMethod(); + }}; + switch (methodHeader.secOp) { case PushBufferMethodHeader::SecOp::IncMethod: - for (u32 i{}; i < methodHeader.methodCount; i++) - Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); - break; + if (remainingEntries >= methodHeader.methodCount) { + for (u32 i{}; i < methodHeader.methodCount; i++) + Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + break; + } else { + startSplitMethod(MethodResumeState::State::Inc); + return; + } case PushBufferMethodHeader::SecOp::NonIncMethod: - for (u32 i{}; i < methodHeader.methodCount; i++) - Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); - break; + if (remainingEntries >= methodHeader.methodCount) { + for (u32 i{}; i < methodHeader.methodCount; i++) + Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + break; + } else { + startSplitMethod(MethodResumeState::State::NonInc); + return; + } case PushBufferMethodHeader::SecOp::OneInc: - for (u32 i{}; i < methodHeader.methodCount; i++) - Send(methodHeader.methodAddress + !!i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); - break; + if (remainingEntries >= methodHeader.methodCount) { + for (u32 i{}; i < methodHeader.methodCount; i++) + Send(methodHeader.methodAddress + (i ? 1 : 0), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + break; + } else { + startSplitMethod(MethodResumeState::State::OneInc); + return; + } case PushBufferMethodHeader::SecOp::ImmdDataMethod: Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true); break; @@ -154,6 +218,7 @@ namespace skyline::soc::gm20b { pthread_setname_np(pthread_self(), "GPFIFO"); try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); + gpEntries.Process([this](GpEntry gpEntry) { state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size); Process(gpEntry); diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h index c8222c97..00994e07 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h @@ -87,6 +87,7 @@ namespace skyline::soc::gm20b { * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 */ class ChannelGpfifo { + private: const DeviceState &state; ChannelContext &channelCtx; engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls @@ -94,11 +95,32 @@ namespace skyline::soc::gm20b { std::thread thread; //!< The thread that manages processing of pushbuffers std::vector pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations + /** + * @brief Holds the required state in order to resume a method started from one call to `Process` in another + * @note This is needed as games (especially OpenGL ones) can split method entries over multiple GpEntries + */ + struct MethodResumeState { + u32 remaining; //!< The number of entries left to handle until the method is finished + u32 address; //!< The method address in the GPU block specified by `subchannel` that is the target of the command + u8 subChannel; + + /** + * @brief This is a simplified version of the full method type enum + */ + enum class State : u8 { + NonInc, + Inc, + OneInc //!< Will be switched to NonInc after the first call + } state; //!< The type of method to resume + } resumeState{}; + + /** * @brief Sends a method call to the GPU hardware */ void Send(u32 method, u32 argument, u32 subchannel, bool lastCall); + /** * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed */ @@ -118,7 +140,7 @@ namespace skyline::soc::gm20b { void Run(); /** - * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' + * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Process' */ void Push(span entries);