From fc017e1e9500ba4db9de9a8a73b3936dc08ff79d Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05@gmail.com>
Date: Mon, 11 Oct 2021 20:08:42 +0100
Subject: [PATCH] Implement pre-wait and post-increment syncpoint operations in
 submit

These are used by both OpenGL and Vulkan games as opposed to including
the operations inside the main commandbuffer.
---
 app/CMakeLists.txt                            |   1 +
 .../main/cpp/skyline/common/circular_queue.h  |  10 +-
 .../services/nvdrv/devices/nvhost/as_gpu.cpp  |  28 ++---
 .../services/nvdrv/devices/nvhost/as_gpu.h    |   8 +-
 .../nvdrv/devices/nvhost/gpu_channel.cpp      | 103 ++++++++++++++++--
 .../nvdrv/devices/nvhost/gpu_channel.h        |  11 +-
 .../cpp/skyline/soc/gm20b/engines/gpfifo.cpp  |  50 +++++++++
 .../cpp/skyline/soc/gm20b/engines/gpfifo.h    |  58 +++++-----
 .../skyline/soc/gm20b/engines/maxwell_3d.cpp  |   2 +-
 app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp |   8 +-
 app/src/main/cpp/skyline/soc/gm20b/gpfifo.h   |  15 ++-
 11 files changed, 224 insertions(+), 70 deletions(-)
 create mode 100644 app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt
index e3e09a20..f8b2c1f5 100644
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@@ -107,6 +107,7 @@ add_library(skyline SHARED
         ${source_DIR}/skyline/soc/gm20b/channel.cpp
         ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
         ${source_DIR}/skyline/soc/gm20b/gmmu.cpp
+        ${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
         ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
         ${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
         ${source_DIR}/skyline/input/npad.cpp
diff --git a/app/src/main/cpp/skyline/common/circular_queue.h b/app/src/main/cpp/skyline/common/circular_queue.h
index 42c5d0de..1c030f05 100644
--- a/app/src/main/cpp/skyline/common/circular_queue.h
+++ b/app/src/main/cpp/skyline/common/circular_queue.h
@@ -76,12 +76,14 @@ namespace skyline {
 
         void Push(const Type &item) {
             std::unique_lock lock(productionMutex);
-            end = (end == reinterpret_cast<Type *>(vector.end().base()) - 1) ? reinterpret_cast<Type *>(vector.begin().base()) : end;
-            if (start == end + 1) {
+            auto next{end + 1};
+            next = (next == reinterpret_cast<Type *>(vector.end().base())) ? reinterpret_cast<Type *>(vector.begin().base()) : next;
+            if (next == start) {
                 std::unique_lock consumeLock(consumptionMutex);
-                consumeCondition.wait(consumeLock, [=]() { return start != end + 1; });
+                consumeCondition.wait(consumeLock, [=]() { return next != start; });
             }
-            *end = item;
+            *next = item;
+            end = next;
             produceCondition.notify_one();
         }
 
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp
index 2ddd5d72..39871e16 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp
@@ -37,6 +37,7 @@ namespace skyline::service::nvdrv::device::nvhost {
             }
             
             gpuCh.asCtx = asCtx;
+            gpuCh.asAllocator = vm.smallPageAllocator;
         } catch (const std::out_of_range &e) {
             state.logger->Warn("Attempting to bind AS to an invalid channel: {}", channelFd);
             return PosixResult::InvalidArgument;
@@ -62,17 +63,12 @@ namespace skyline::service::nvdrv::device::nvhost {
 
         u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits};
 
-        auto &allocator{[&] () -> auto & {
-            if (pageSize == VM::PageSize)
-                return vm.smallPageAllocator;
-            else
-                return vm.bigPageAllocator;
-        }()};
+        auto &allocator{pageSize == VM::PageSize ? *vm.smallPageAllocator : *vm.bigPageAllocator};
 
         if (flags.fixed)
-            allocator->AllocateFixed(offset >> pageSizeBits, pages);
+            allocator.AllocateFixed(offset >> pageSizeBits, pages);
         else
-            offset = static_cast<u64>(allocator->Allocate(pages)) << pageSizeBits;
+            offset = static_cast<u64>(allocator.Allocate(pages)) << pageSizeBits;
 
         u64 size{static_cast<u64>(pages) * pageSize};
 
@@ -92,10 +88,10 @@ namespace skyline::service::nvdrv::device::nvhost {
         auto mapping{mappingMap.at(offset)};
 
         if (!mapping->fixed) {
-            auto &allocator{mapping->bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
+            auto &allocator{mapping->bigPage ? *vm.bigPageAllocator : *vm.smallPageAllocator};
             u32 pageSizeBits{mapping->bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
 
-            allocator->Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
+            allocator.Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
         }
 
         // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
@@ -129,10 +125,10 @@ namespace skyline::service::nvdrv::device::nvhost {
             if (allocation.sparse)
                 asCtx->gmmu.Unmap(offset, allocation.size);
 
-            auto &allocator{pageSize == VM::PageSize ? vm.smallPageAllocator : vm.bigPageAllocator};
+            auto &allocator{pageSize == VM::PageSize ? *vm.smallPageAllocator : *vm.bigPageAllocator};
             u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits};
 
-            allocator->Free(offset >> pageSizeBits, allocation.size >> pageSizeBits);
+            allocator.Free(offset >> pageSizeBits, allocation.size >> pageSizeBits);
             allocationMap.erase(offset);
         } catch (const std::out_of_range &e) {
             return PosixResult::InvalidArgument;
@@ -153,10 +149,10 @@ namespace skyline::service::nvdrv::device::nvhost {
             auto mapping{mappingMap.at(offset)};
 
             if (!mapping->fixed) {
-                auto &allocator{mapping->bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
+                auto &allocator{mapping->bigPage ? *vm.bigPageAllocator : *vm.smallPageAllocator};
                 u32 pageSizeBits{mapping->bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
 
-                allocator->Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
+                allocator.Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
             }
 
             // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
@@ -233,11 +229,11 @@ namespace skyline::service::nvdrv::device::nvhost {
                     throw exception("Invalid handle alignment: 0x{:X}", h->align);
             }()};
 
-            auto &allocator{bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
+            auto &allocator{bigPage ? *vm.bigPageAllocator : *vm.smallPageAllocator};
             u32 pageSize{bigPage ? vm.bigPageSize : VM::PageSize};
             u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
 
-            offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits;
+            offset = static_cast<u64>(allocator.Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits;
             asCtx->gmmu.Map(offset, cpuPtr, size);
 
             auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)};
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h
index 26c7ff00..a9a64062 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h
@@ -8,6 +8,8 @@
 #include <services/nvdrv/devices/nvdevice.h>
 
 namespace skyline::service::nvdrv::device::nvhost {
+    class GpuChannel;
+
     /**
      * @brief nvhost::AsGpu (/dev/nvhost-as-gpu) is used to access a GPU virtual address space
      * @url https://switchbrew.org/wiki/NV_services#.2Fdev.2Fnvhost-as-gpu
@@ -59,14 +61,16 @@ namespace skyline::service::nvdrv::device::nvhost {
 
             using Allocator = FlatAllocator<u32, 0, 32>;
 
-            std::unique_ptr<Allocator> bigPageAllocator{};
-            std::unique_ptr<Allocator> smallPageAllocator{};
+            std::unique_ptr<Allocator> bigPageAllocator;
+            std::shared_ptr<Allocator> smallPageAllocator; // Shared as this is also used by nvhost::GpuChannel
 
             bool initialised{};
         } vm;
 
         std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx;
 
+        friend GpuChannel;
+
         void FreeMappingLocked(u64 offset);
 
       public:
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp
index 2bebb602..fd579ca8 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp
@@ -14,6 +14,57 @@ namespace skyline::service::nvdrv::device::nvhost {
         channelSyncpoint = core.syncpointManager.AllocateSyncpoint(false);
     }
 
+    static constexpr size_t SyncpointWaitCmdLen{4};
+    static void AddSyncpointWaitCmd(span<u32> mem, Fence fence) {
+        size_t offset{};
+
+        // gpfifo.regs.syncpoint.payload = fence.threshold
+        mem[offset++] = 0x2001001C;
+        mem[offset++] = fence.threshold;
+
+        /*
+         gpfifo.regs.syncpoint = {
+             .index = fence.id
+             .operation = SyncpointOperation::Wait
+             .waitSwitch = SyncpointWaitSwitch::En
+         }
+         Then wait is triggered
+        */
+        mem[offset++] = 0x2001001D;
+        mem[offset++] = (fence.id << 8) | 0x10;
+    }
+
+    static constexpr size_t SyncpointIncrCmdMaxLen{8};
+    static void AddSyncpointIncrCmd(span<u32> mem, Fence fence, bool wfi) {
+        size_t offset{};
+
+        if (wfi) {
+            // gpfifo.regs.wfi.scope = WfiScope::CurrentScgType
+            // Then WFI is triggered
+            mem[offset++] = 0x2001001E;
+            mem[offset++] = 0;
+        }
+
+
+        // gpfifo.regs.syncpoint.payload = 0
+        mem[offset++] = 0x2001001C;
+        mem[offset++] = 0;
+
+        /*
+         gpfifo.regs.syncpoint = {
+             .index = fence.id
+             .operation = SyncpointOperation::Incr
+         }
+         Then increment is triggered
+        */
+        mem[offset++] = 0x2001001D;
+        mem[offset++] = (fence.id << 8) | 0x1;
+
+        // Repeat twice, likely due to HW bugs
+        mem[offset++] = 0x2001001D;
+        mem[offset++] = (fence.id << 8) | 0x1;
+    }
+
     PosixResult GpuChannel::SetNvmapFd(In<core::NvMap::Handle::Id> id) {
         state.logger->Debug("id: {}", id);
         return PosixResult::Success;
@@ -35,28 +86,44 @@ namespace skyline::service::nvdrv::device::nvhost {
         if (numEntries > gpEntries.size())
             throw exception("GpEntry size mismatch!");
 
+        std::scoped_lock lock(channelMutex);
+
         if (flags.fenceWait) {
             if (flags.incrementWithValue)
                 return PosixResult::InvalidArgument;
 
-            if (!core.syncpointManager.IsFenceSignalled(fence))
-                throw exception("Waiting on a fence through SubmitGpfifo is unimplemented");
+            if (!core.syncpointManager.IsFenceSignalled(fence)) {
+                // Wraparound
+                if (pushBufferMemoryOffset + SyncpointWaitCmdLen >= pushBufferMemory.size())
+                    pushBufferMemoryOffset = 0;
+
+                AddSyncpointWaitCmd(span(pushBufferMemory).subspan(pushBufferMemoryOffset, SyncpointWaitCmdLen), fence);
+                channelCtx->gpfifo.Push(soc::gm20b::GpEntry(pushBufferAddr + pushBufferMemoryOffset * sizeof(u32), SyncpointWaitCmdLen));
+
+                // Increment offset
+                pushBufferMemoryOffset += SyncpointWaitCmdLen;
+            }
         }
 
-        {
-            std::scoped_lock lock(channelMutex);
+        channelCtx->gpfifo.Push(gpEntries.subspan(0, numEntries));
 
-            channelCtx->gpfifo.Push(gpEntries.subspan(0, numEntries));
+        fence.id = channelSyncpoint;
 
-            fence.id = channelSyncpoint;
+        u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)};
+        fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment);
 
-            u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)};
-            fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment);
+        if (flags.fenceIncrement) {
+            // Wraparound
+            if (pushBufferMemoryOffset + SyncpointIncrCmdMaxLen >= pushBufferMemory.size())
+                pushBufferMemoryOffset = 0;
+
+            AddSyncpointIncrCmd(span(pushBufferMemory).subspan(pushBufferMemoryOffset, SyncpointIncrCmdMaxLen), fence, !flags.suppressWfi);
+            channelCtx->gpfifo.Push(soc::gm20b::GpEntry(pushBufferAddr + pushBufferMemoryOffset * sizeof(u32), SyncpointIncrCmdMaxLen));
+
+            // Increment offset
+            pushBufferMemoryOffset += SyncpointIncrCmdMaxLen;
         }
 
-        if (flags.fenceIncrement)
-            throw exception("Incrementing a fence through SubmitGpfifo is unimplemented");
-
         flags.raw = 0;
 
         return PosixResult::Success;
@@ -90,7 +157,7 @@ namespace skyline::service::nvdrv::device::nvhost {
         state.logger->Debug("numEntries: {}, numJobs: {}, flags: 0x{:X}", numEntries, numJobs, flags);
 
         std::scoped_lock lock(channelMutex);
-        if (!asCtx) {
+        if (!asCtx || !asAllocator) {
             state.logger->Warn("Trying to allocate a channel without a bound address space");
             return PosixResult::InvalidArgument;
         }
@@ -104,6 +171,18 @@ namespace skyline::service::nvdrv::device::nvhost {
 
         fence = core.syncpointManager.GetSyncpointFence(channelSyncpoint);
 
+        // Allocate space for one wait and incr for each entry, though we're not likely to hit this in practice
+        size_t pushBufferWords{numEntries * SyncpointIncrCmdMaxLen + numEntries * SyncpointWaitCmdLen};
+        size_t pushBufferSize{pushBufferWords * sizeof(u32)};
+
+        pushBufferMemory.resize(pushBufferWords);
+
+        // Allocate pages in the GPU AS
+        pushBufferAddr = static_cast<u64>(asAllocator->Allocate((pushBufferWords >> AsGpu::VM::PageSizeBits) + 1)) << AsGpu::VM::PageSizeBits;
+
+        // Map onto the GPU
+        asCtx->gmmu.Map(pushBufferAddr, reinterpret_cast<u8 *>(pushBufferMemory.data()), pushBufferSize);
+
         return PosixResult::Success;
     }
 
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h
index a8c71bad..5d35e7a1 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h
@@ -6,11 +6,10 @@
 #include <services/common/fence.h>
 #include <soc/gm20b/engines/maxwell_3d.h> // TODO: remove
 #include <soc/gm20b/channel.h>
-#include "services/nvdrv/devices/nvdevice.h"
+#include <services/nvdrv/devices/nvdevice.h>
+#include "as_gpu.h"
 
 namespace skyline::service::nvdrv::device::nvhost {
-    class AsGpu;
-
     /**
      * @brief nvhost::GpuChannel is used to create and submit commands to channels which are effectively GPU processes
      * @url https://switchbrew.org/wiki/NV_services#Channels
@@ -25,8 +24,14 @@ namespace skyline::service::nvdrv::device::nvhost {
         std::shared_ptr<type::KEvent> errorNotifierEvent;
 
         std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx;
+        std::shared_ptr<AsGpu::VM::Allocator> asAllocator;
         std::unique_ptr<soc::gm20b::ChannelContext> channelCtx;
 
+
+        u64 pushBufferAddr{};
+        size_t pushBufferMemoryOffset{};
+        std::vector<u32> pushBufferMemory;
+
         friend AsGpu;
 
       public:
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
new file mode 100644
index 00000000..9b15a33b
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <soc.h>
+#include <soc/gm20b/channel.h>
+#include "gpfifo.h"
+
+namespace skyline::soc::gm20b::engine {
+    GPFIFO::GPFIFO(const DeviceState &state, ChannelContext &channelCtx) : Engine(state), channelCtx(channelCtx) {}
+
+    void GPFIFO::CallMethod(u32 method, u32 argument, bool lastCall) {
+        state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", method, argument);
+
+        registers.raw[method] = argument;
+
+        #define GPFIFO_OFFSET(field) U32_OFFSET(Registers, field)
+        #define GPFIFO_STRUCT_OFFSET(field, member) GPFIFO_OFFSET(field) + U32_OFFSET(typeof(Registers::field), member)
+
+        #define GPFIFO_CASE_BASE(fieldName, fieldAccessor, offset, content) case offset: { \
+            auto fieldName{util::BitCast<typeof(registers.fieldAccessor)>(argument)};      \
+            content                                                                        \
+            return;                                                                        \
+        }
+        #define GPFIFO_CASE(field, content) GPFIFO_CASE_BASE(field, field, GPFIFO_OFFSET(field), content)
+        #define GPFIFO_STRUCT_CASE(field, member, content) GPFIFO_CASE_BASE(member, field.member, GPFIFO_STRUCT_OFFSET(field, member), content)
+
+        switch (method) {
+            GPFIFO_STRUCT_CASE(syncpoint, action, {
+                if (action.operation == Registers::SyncpointOperation::Incr) {
+                    state.logger->Debug("Increment syncpoint: {}", +action.index);
+                    channelCtx.executor.Execute();
+                    state.soc->host1x.syncpoints.at(action.index).Increment();
+                } else if (action.operation == Registers::SyncpointOperation::Wait) {
+                    state.logger->Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint.payload);
+
+                    // Wait forever for another channel to increment
+                    state.soc->host1x.syncpoints.at(action.index).Wait(registers.syncpoint.payload, std::chrono::steady_clock::duration::max());
+                }
+            })
+        }
+
+        #undef GPFIFO_STRUCT_CASE
+        #undef GPFIFO_CASE
+        #undef GPFIFO_CASE_BASE
+        #undef GPFIFO_STRUCT_OFFSET
+        #undef GPFIFO_OFFSET
+    };
+}
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
index 1b36163e..9febd253 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
@@ -5,6 +5,10 @@
 
 #include "engine.h"
 
+namespace skyline::soc::gm20b {
+    struct ChannelContext;
+}
+
 namespace skyline::soc::gm20b::engine {
     /**
     * @brief The GPFIFO engine handles managing macros and semaphores
@@ -93,24 +97,24 @@ namespace skyline::soc::gm20b::engine {
                     u16 nvClass : 16;
                     u8 engine : 5;
                     u16 _pad_ : 11;
-                } setObject;
+                } setObject; // 0x0
 
-                u32 illegal;
-                u32 nop;
-                u32 _pad0_;
+                u32 illegal; // 0x1
+                u32 nop; // 0x2
+                u32 _pad0_; // 0x3
 
                 struct {
                     struct {
                         u32 offsetUpper : 8;
                         u32 _pad0_ : 24;
-                    };
+                    }; // 0x4
 
                     struct {
                         u8 _pad1_ : 2;
                         u32 offsetLower : 30;
-                    };
+                    }; // 0x5
 
-                    u32 payload;
+                    u32 payload; // 0x6
 
                     struct {
                         SemaphoreOperation operation : 5;
@@ -123,54 +127,52 @@ namespace skyline::soc::gm20b::engine {
                         u8 _pad5_ : 2;
                         SemaphoreReduction reduction : 4;
                         SemaphoreFormat format : 1;
-                    };
+                    }; // 0x7
                 } semaphore;
 
-                u32 nonStallInterrupt;
-                u32 fbFlush;
-                u32 _pad1_[2];
-                u32 memOpC;
-                u32 memOpD;
-                u32 _pad2_[6];
-                u32 setReference;
-                u32 _pad3_[7];
+                u32 nonStallInterrupt; // 0x8
+                u32 fbFlush; // 0x9
+                u32 _pad1_[2]; // 0xA
+                u32 memOpC; // 0xC
+                u32 memOpD; // 0xD
+                u32 _pad2_[6]; // 0xE
+                u32 setReference; // 0x14
+                u32 _pad3_[7]; // 0x15
 
                 struct {
-                    u32 payload;
+                    u32 payload; // 0x1C
 
                     struct {
                         SyncpointOperation operation : 1;
                         u8 _pad0_ : 3;
-                        SyncpointWaitSwitch waitSwitch : 1;
+                        SyncpointWaitSwitch waitSwitch : 1; //!< If the PBDMA unit can switch to a different timeslice group (TSG) while waiting on a syncpoint
                         u8 _pad1_ : 3;
                         u16 index : 12;
                         u16 _pad2_ : 12;
-                    };
+                    } action; // 0x1D
                 } syncpoint;
 
                 struct {
                     WfiScope scope : 1;
                     u32 _pad_ : 31;
-                } wfi;
+                } wfi; // 0x1E
 
-                u32 crcCheck;
+                u32 crcCheck; // 0x1F
 
                 struct {
                     YieldOp op : 2;
                     u32 _pad_ : 30;
-                } yield;
+                } yield; // 0x20
             };
         } registers{};
         static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
         #pragma pack(pop)
 
+        ChannelContext &channelCtx;
+
       public:
-        GPFIFO(const DeviceState &state) : Engine(state) {}
+        GPFIFO(const DeviceState &state, ChannelContext &channelCtx);
 
-        void CallMethod(u32 method, u32 argument, bool lastCall) {
-            state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", method, argument);
-
-            registers.raw[method] = argument;
-        };
+        void CallMethod(u32 method, u32 argument, bool lastCall);
     };
 }
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
index 66884a15..7e3c795b 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
@@ -244,7 +244,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
             })
 
             MAXWELL3D_CASE(syncpointAction, {
-                state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
+                state.logger->Debug("Increment syncpoint: {}", +syncpointAction.id);
                 channelCtx.executor.Execute();
                 state.soc->host1x.syncpoints.at(syncpointAction.id).Increment();
             })
diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
index 2d895f5f..cc3cb865 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@@ -61,7 +61,7 @@ namespace skyline::soc::gm20b {
 
     ChannelGpfifo::ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries) :
         state(state),
-        gpfifoEngine(state),
+        gpfifoEngine(state, channelCtx),
         channelCtx(channelCtx),
         gpEntries(numEntries),
         thread(std::thread(&ChannelGpfifo::Run, this)) {}
@@ -155,7 +155,7 @@ namespace skyline::soc::gm20b {
         try {
             signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
             gpEntries.Process([this](GpEntry gpEntry) {
-                state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address());
+                state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
                 Process(gpEntry);
             });
         } catch (const signal::SignalException &e) {
@@ -175,6 +175,10 @@ namespace skyline::soc::gm20b {
         gpEntries.Append(entries);
     }
 
+    void ChannelGpfifo::Push(GpEntry entry) {
+        gpEntries.Push(entry);
+    }
+
     ChannelGpfifo::~ChannelGpfifo() {
         if (thread.joinable()) {
             pthread_kill(thread.native_handle(), SIGINT);
diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
index 931c0e16..c8222c97 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
@@ -21,7 +21,7 @@ namespace skyline::soc::gm20b {
         };
 
         union {
-            u32 entry0;
+            u32 entry0{};
 
             struct {
                 Fetch fetch : 1;
@@ -53,7 +53,7 @@ namespace skyline::soc::gm20b {
         };
 
         union {
-            u32 entry1;
+            u32 entry1{};
 
             struct {
                 union {
@@ -68,6 +68,12 @@ namespace skyline::soc::gm20b {
             };
         };
 
+        constexpr GpEntry(u64 gpuAddress, u32 pSize) {
+            getHi = static_cast<u8>(gpuAddress >> 32);
+            get = static_cast<u32>(gpuAddress >> 2);
+            size = pSize;
+        }
+
         constexpr u64 Address() const {
             return (static_cast<u64>(getHi) << 32) | (static_cast<u64>(get) << 2);
         }
@@ -115,5 +121,10 @@ namespace skyline::soc::gm20b {
          * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
          */
         void Push(span<GpEntry> entries);
+
+        /**
+         * @brief Pushes a single entry to the FIFO, these commands will be executed on calls to 'Process'
+         */
+        void Push(GpEntry entries);
     };
 }