Heavily optimise GPFIFO command dispatch to reduce redundant checks

Previously for methods with count > 1 the subchannel and engine would be looked up for each part of the method rather than only doing so at the start. Each call also needed to be looked up to see if it touched a macro or GPFIFO method. Fix this by doing checks outside of the main dispatch loop with templated helper lambdas to avoid needing to repeat lots of code. Maxwell3D is the only subchannel with a fast path for now but more can be added later if needed.
2025-07-25 19:21:34 +03:00 · 2022-02-22 20:37:31 +00:00 · 2022-02-22 20:37:31 +00:00 · 7e16c1f989
commit 7e16c1f989
parent b4927d0138
2 changed files with 117 additions and 51 deletions
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@ -56,6 +56,26 @@ namespace skyline::soc::gm20b {
            u32 _pad2_ : 29;
            SecOp secOp : 3;
        };
        /**
         * @brief Checks if a method is 'pure' i.e. does not touch macro or GPFIFO methods
         */
        bool Pure() const {
            u16 size{[&]() -> u16  {
                switch (secOp) {
                    case SecOp::NonIncMethod:
                    case SecOp::ImmdDataMethod:
                        return 0;
                    case SecOp::OneInc:
                        return 1;
                    default:
                        return methodCount;
                }
            }()};
            u16 end{static_cast<u16>(methodAddress + size)};
            return end < engine::EngineMethodsEnd && methodAddress >= engine::GPFIFO::RegisterCount;
        }
    };
    static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
@ -66,20 +86,11 @@ namespace skyline::soc::gm20b {
        gpEntries(numEntries),
        thread(std::thread(&ChannelGpfifo::Run, this)) {}
-    void ChannelGpfifo::Send(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
+    void ChannelGpfifo::SendFull(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
        Logger::Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", method, argument, subChannel, lastCall);
        if (method < engine::GPFIFO::RegisterCount) {
            gpfifoEngine.CallMethod(method, argument);
        } else if (method < engine::EngineMethodsEnd) { [[likely]]
-            switch (subChannel) {
+            SendPure(method, argument, subChannel);
                case SubchannelId::ThreeD:
                    channelCtx.maxwell3D->CallMethod(method, argument);
                    break;
                default:
                    Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
                    break;
            }
        } else {
            switch (subChannel) {
                case SubchannelId::ThreeD:
@ -96,6 +107,17 @@ namespace skyline::soc::gm20b {
        }
    }
    void ChannelGpfifo::SendPure(u32 method, u32 argument, SubchannelId subChannel) {
        switch (subChannel) {
            case SubchannelId::ThreeD:
                channelCtx.maxwell3D->CallMethod(method, argument);
                break;
            default:
                Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
                break;
        }
    }
    void ChannelGpfifo::Process(GpEntry gpEntry) {
        if (!gpEntry.size) {
            // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers
@ -119,18 +141,18 @@ namespace skyline::soc::gm20b {
            switch (resumeState.state) {
                case MethodResumeState::State::Inc:
                    while (entry != pushBufferData.end() && resumeState.remaining)
-                        Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
+                        SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
                    break;
                case MethodResumeState::State::OneInc:
-                    Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
+                    SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
                    // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
                    resumeState.state = MethodResumeState::State::NonInc;
                    [[fallthrough]];
                case MethodResumeState::State::NonInc:
                    while (entry != pushBufferData.end() && resumeState.remaining)
-                        Send(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
+                        SendFull(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
                    break;
            }
@ -149,7 +171,7 @@ namespace skyline::soc::gm20b {
            PushBufferMethodHeader methodHeader{.raw = *entry};
            // Needed in order to check for methods split across multiple GpEntries
-            auto remainingEntries{std::distance(entry, pushBufferData.end()) - 1};
+            ssize_t remainingEntries{std::distance(entry, pushBufferData.end()) - 1};
            // Handles storing state and initial execution for methods that are split across multiple GpEntries
            auto startSplitMethod{[&](auto methodState) {
@ -166,47 +188,86 @@ namespace skyline::soc::gm20b {
                resumeSplitMethod();
            }};
            /**
             * @brief Handles execution of a specific method type as specified by the State template parameter
             * @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
             */
            auto dispatchCalls{[&]<bool ThreeDOnly, MethodResumeState::State State> () {
                /**
                 * @brief Gets the offset to apply to the method address for a given dispatch loop index
                 */
                auto methodOffset{[] (u32 i) -> u32 {
                    switch (State)  {
                        case MethodResumeState::State::Inc:
                            return i;
                        case MethodResumeState::State::OneInc:
                            return i ? 1 : 0;
                        case MethodResumeState::State::NonInc:
                            return 0;
                    }
                }};
                if (remainingEntries >= methodHeader.methodCount) {
                    if (methodHeader.Pure()) [[likely]] {
                        for (u32 i{}; i < methodHeader.methodCount; i++) {
                            if constexpr (ThreeDOnly) {
                                channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress + methodOffset(i), *++entry);
                            } else {
                                SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
                            }
                        }
                    } else {
                        // Slow path for methods that touch GPFIFO or macros
                        for (u32 i{}; i < methodHeader.methodCount; i++)
                            SendFull(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
                    }
                } else {
                    startSplitMethod(State);
                    return true;
                }
                return false;
            }};
            /**
             * @brief Handles execution of a single method
             * @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
             * @return If the this was the final method in the current GpEntry
             */
            auto processMethod{[&] <bool ThreeDOnly> () -> bool {
                switch (methodHeader.secOp) {
                    case PushBufferMethodHeader::SecOp::IncMethod:
-                    if (remainingEntries >= methodHeader.methodCount) {
+                        return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::Inc>();
                        for (u32 i{}; i < methodHeader.methodCount; i++)
                            Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
                        break;
                    } else {
                        startSplitMethod(MethodResumeState::State::Inc);
                        return;
                    }
                    case PushBufferMethodHeader::SecOp::NonIncMethod:
-                    if (remainingEntries >= methodHeader.methodCount) {
+                        return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::NonInc>();
                        for (u32 i{}; i < methodHeader.methodCount; i++)
                            Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
                        break;
                    } else {
                        startSplitMethod(MethodResumeState::State::NonInc);
                        return;
                    }
                    case PushBufferMethodHeader::SecOp::OneInc:
-                    if (remainingEntries >= methodHeader.methodCount) {
+                        return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::OneInc>();
                        for (u32 i{}; i < methodHeader.methodCount; i++)
                            Send(methodHeader.methodAddress + (i ? 1 : 0), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
                        break;
                    } else {
                        startSplitMethod(MethodResumeState::State::OneInc);
                        return;
                    }
                    case PushBufferMethodHeader::SecOp::ImmdDataMethod:
-                    Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
+                        if (methodHeader.Pure()) {
-                    break;
+                            if constexpr (ThreeDOnly)
-
+                                channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress, methodHeader.immdData);
                            else
                                SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
                        } else {
                            SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
                        }
                        return false;
                    case PushBufferMethodHeader::SecOp::EndPbSegment:
-                    return;
+                        return true;
                    default:
                        throw exception("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
                }
            }};
            bool hitEnd{[&]() {
                if (methodHeader.methodSubChannel == SubchannelId::ThreeD) [[likely]]
                    return processMethod.operator()<true>();
                else
                    return processMethod.operator()<false>();
            }()};
            if (hitEnd)
                return;
        }
    }
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
@ -130,9 +130,14 @@ namespace skyline::soc::gm20b {
        std::thread thread; //!< The thread that manages processing of pushbuffers
        /**
-         * @brief Sends a method call to the GPU hardware
+         * @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods
         */
-        void Send(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);
+        void SendFull(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);
        /**
         * @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled
         */
        void SendPure(u32 method, u32 argument, SubchannelId subchannel);
        /**
         * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed