diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h index a9a64062..1487b2de 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h @@ -62,12 +62,12 @@ namespace skyline::service::nvdrv::device::nvhost { using Allocator = FlatAllocator; std::unique_ptr bigPageAllocator; - std::shared_ptr smallPageAllocator; // Shared as this is also used by nvhost::GpuChannel + std::shared_ptr smallPageAllocator; //! Shared as this is also used by nvhost::GpuChannel bool initialised{}; } vm; - std::shared_ptr asCtx; + std::shared_ptr asCtx; //!< The guest GPU AS context that is associated with each particular instance friend GpuChannel; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h index 5d35e7a1..b5f3461c 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h @@ -23,14 +23,14 @@ namespace skyline::service::nvdrv::device::nvhost { std::shared_ptr smExceptionBreakpointPauseReportEvent; std::shared_ptr errorNotifierEvent; - std::shared_ptr asCtx; - std::shared_ptr asAllocator; - std::unique_ptr channelCtx; + std::shared_ptr asCtx; //!< The guest GPU AS context submits from this channel are bound to + std::shared_ptr asAllocator; //!< The small page allocator context for the AS that's bound to this channel, used to allocate space for `pushBufferMemory` + std::unique_ptr channelCtx; //!< The entire guest GPU context specific to this channel - u64 pushBufferAddr{}; - size_t pushBufferMemoryOffset{}; - std::vector pushBufferMemory; + u64 pushBufferAddr{}; //!< The GPU address `pushBufferMemory` is mapped to + size_t pushBufferMemoryOffset{}; //!< The current offset for which to write new pushbuffer method data into for post-increment and pre-wait + std::vector pushBufferMemory; //!< Mapped into the guest GPU As and used to store method data for pre/post increment commands friend AsGpu; diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index cc3cb865..23c580d5 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -115,28 +115,92 @@ namespace skyline::soc::gm20b { pushBufferData.resize(gpEntry.size); channelCtx.asCtx->gmmu.Read(pushBufferData, gpEntry.Address()); - for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { + // There will be at least one entry here + auto entry{pushBufferData.begin()}; + + // Executes the current split method, returning once execution is finished or the current GpEntry has reached its end + auto resumeSplitMethod{[&](){ + switch (resumeState.state) { + case MethodResumeState::State::Inc: + while (entry != pushBufferData.end() && resumeState.remaining) + Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); + + break; + case MethodResumeState::State::OneInc: + Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); + + // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries + resumeState.state = MethodResumeState::State::NonInc; + [[fallthrough]]; + case MethodResumeState::State::NonInc: + while (entry != pushBufferData.end() && resumeState.remaining) + Send(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); + + break; + } + }}; + + // We've a method from a previous GpEntry that needs resuming + if (resumeState.remaining) + resumeSplitMethod(); + + // Process more methods if the entries are still not all used up after handling resuming + for (; entry != pushBufferData.end(); entry++) { // An entry containing all zeroes is a NOP, skip over it if (*entry == 0) continue; PushBufferMethodHeader methodHeader{.raw = *entry}; + + // Needed in order to check for methods split across multiple GpEntries + auto remainingEntries{std::distance(entry, pushBufferData.end()) - 1}; + + // Handles storing state and initial execution for methods that are split across multiple GpEntries + auto startSplitMethod{[&](auto methodState) { + resumeState = { + .remaining = methodHeader.methodCount, + .address = methodHeader.methodAddress, + .subChannel = methodHeader.methodSubChannel, + .state = methodState + }; + + // Skip over method header as `resumeSplitMethod` doesn't expect it to be there + entry++; + + resumeSplitMethod(); + }}; + switch (methodHeader.secOp) { case PushBufferMethodHeader::SecOp::IncMethod: - for (u32 i{}; i < methodHeader.methodCount; i++) - Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); - break; + if (remainingEntries >= methodHeader.methodCount) { + for (u32 i{}; i < methodHeader.methodCount; i++) + Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + break; + } else { + startSplitMethod(MethodResumeState::State::Inc); + return; + } case PushBufferMethodHeader::SecOp::NonIncMethod: - for (u32 i{}; i < methodHeader.methodCount; i++) - Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); - break; + if (remainingEntries >= methodHeader.methodCount) { + for (u32 i{}; i < methodHeader.methodCount; i++) + Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + break; + } else { + startSplitMethod(MethodResumeState::State::NonInc); + return; + } case PushBufferMethodHeader::SecOp::OneInc: - for (u32 i{}; i < methodHeader.methodCount; i++) - Send(methodHeader.methodAddress + !!i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); - break; + if (remainingEntries >= methodHeader.methodCount) { + for (u32 i{}; i < methodHeader.methodCount; i++) + Send(methodHeader.methodAddress + (i ? 1 : 0), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + break; + } else { + startSplitMethod(MethodResumeState::State::OneInc); + return; + } case PushBufferMethodHeader::SecOp::ImmdDataMethod: Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true); break; @@ -154,6 +218,7 @@ namespace skyline::soc::gm20b { pthread_setname_np(pthread_self(), "GPFIFO"); try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); + gpEntries.Process([this](GpEntry gpEntry) { state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size); Process(gpEntry); diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h index c8222c97..00994e07 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h @@ -87,6 +87,7 @@ namespace skyline::soc::gm20b { * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 */ class ChannelGpfifo { + private: const DeviceState &state; ChannelContext &channelCtx; engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls @@ -94,11 +95,32 @@ namespace skyline::soc::gm20b { std::thread thread; //!< The thread that manages processing of pushbuffers std::vector pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations + /** + * @brief Holds the required state in order to resume a method started from one call to `Process` in another + * @note This is needed as games (especially OpenGL ones) can split method entries over multiple GpEntries + */ + struct MethodResumeState { + u32 remaining; //!< The number of entries left to handle until the method is finished + u32 address; //!< The method address in the GPU block specified by `subchannel` that is the target of the command + u8 subChannel; + + /** + * @brief This is a simplified version of the full method type enum + */ + enum class State : u8 { + NonInc, + Inc, + OneInc //!< Will be switched to NonInc after the first call + } state; //!< The type of method to resume + } resumeState{}; + + /** * @brief Sends a method call to the GPU hardware */ void Send(u32 method, u32 argument, u32 subchannel, bool lastCall); + /** * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed */ @@ -118,7 +140,7 @@ namespace skyline::soc::gm20b { void Run(); /** - * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' + * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Process' */ void Push(span entries);