diff --git a/app/src/main/cpp/skyline/kernel/memory.cpp b/app/src/main/cpp/skyline/kernel/memory.cpp
index 103fe711..066490e7 100644
--- a/app/src/main/cpp/skyline/kernel/memory.cpp
+++ b/app/src/main/cpp/skyline/kernel/memory.cpp
@@ -7,6 +7,8 @@
 namespace skyline::kernel {
     MemoryManager::MemoryManager(const DeviceState &state) : state(state) {}
 
+    constexpr size_t RegionAlignment{1ULL << 21}; //!< The minimum alignment of a HOS memory region
+
     void MemoryManager::InitializeVmm(memory::AddressSpaceType type) {
         switch (type) {
             case memory::AddressSpaceType::AddressSpace32Bit:
@@ -22,7 +24,7 @@ namespace skyline::kernel {
             case memory::AddressSpaceType::AddressSpace39Bit: {
                 addressSpace.address = 0;
                 addressSpace.size = 1UL << 39;
-                base.size = 0x78000000 + 0x1000000000 + 0x180000000 + 0x80000000 + 0x1000000000;
+                base.size = 0x78000000 + 0x1000000000 + 0x180000000 + 0x80000000 + 0x1000000000; // Code region size is an assumed maximum here
                 break;
             }
 
@@ -41,7 +43,7 @@ namespace skyline::kernel {
             }
 
             start = util::HexStringToInt<u64>(std::string_view(maps.data() + maps.find_first_of('-', line) + 1, sizeof(u64) * 2));
-            alignedStart = util::AlignUp(start, 1ULL << 21);
+            alignedStart = util::AlignUp(start, RegionAlignment);
             if (alignedStart + base.size > addressSpace.size)
                 break;
         } while ((line = maps.find_first_of('\n', line)) != std::string::npos && line++);
@@ -49,7 +51,7 @@ namespace skyline::kernel {
         if (!base.address)
             throw exception("Cannot find a suitable carveout for the guest address space");
 
-        mmap(reinterpret_cast<void*>(base.address), base.size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+        mmap(reinterpret_cast<void *>(base.address), base.size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 
         chunks = {ChunkDescriptor{
             .ptr = reinterpret_cast<u8 *>(addressSpace.address),
@@ -60,6 +62,9 @@ namespace skyline::kernel {
 
     void MemoryManager::InitializeRegions(u8 *codeStart, u64 size) {
         u64 address{reinterpret_cast<u64>(codeStart)};
+        if (!util::IsAligned(address, RegionAlignment))
+            throw exception("Non-aligned code region was used to initialize regions: 0x{:X} - 0x{:X}", codeStart, codeStart + size);
+
         switch (addressSpace.size) {
             case 1UL << 36: {
                 code.address = base.address;
@@ -79,7 +84,7 @@ namespace skyline::kernel {
 
             case 1UL << 39: {
                 code.address = base.address;
-                code.size = 0x78000000;
+                code.size = util::AlignUp(size, RegionAlignment);
                 alias.address = code.address + code.size;
                 alias.size = 0x1000000000;
                 heap.address = alias.address + alias.size;
@@ -95,6 +100,12 @@ namespace skyline::kernel {
                 throw exception("Regions initialized without VMM initialization");
         }
 
+        auto newSize{code.size + alias.size + stack.size + heap.size + tlsIo.size};
+        if (newSize > base.size)
+            throw exception("Region size has exceeded pre-allocated area: 0x{:X}/0x{:X}", newSize, base.size);
+        if (newSize != base.size)
+            munmap(reinterpret_cast<u8 *>(base.address) + base.size, newSize - base.size);
+
         if (size > code.size)
             throw exception("Code region ({}) is smaller than mapped code size ({})", code.size, size);
 
diff --git a/app/src/main/cpp/skyline/kernel/svc.cpp b/app/src/main/cpp/skyline/kernel/svc.cpp
index 6fcd827a..d0315301 100644
--- a/app/src/main/cpp/skyline/kernel/svc.cpp
+++ b/app/src/main/cpp/skyline/kernel/svc.cpp
@@ -678,11 +678,11 @@ namespace skyline::kernel::svc {
                 break;
 
             case constant::infoState::AddressSpaceBaseAddr:
-                out = state.process->memory.addressSpace.address;
+                out = state.process->memory.base.address;
                 break;
 
             case constant::infoState::AddressSpaceSize:
-                out = state.process->memory.addressSpace.size;
+                out = state.process->memory.base.size;
                 break;
 
             case constant::infoState::StackRegionBaseAddr:
diff --git a/app/src/main/cpp/skyline/kernel/types/KSharedMemory.cpp b/app/src/main/cpp/skyline/kernel/types/KSharedMemory.cpp
index 24abd938..d93f7371 100644
--- a/app/src/main/cpp/skyline/kernel/types/KSharedMemory.cpp
+++ b/app/src/main/cpp/skyline/kernel/types/KSharedMemory.cpp
@@ -26,7 +26,7 @@ namespace skyline::kernel::type {
         if (ptr && !util::PageAligned(ptr))
             throw exception("KSharedMemory was mapped to a non-page-aligned address: 0x{:X}", ptr);
 
-        guest.ptr = reinterpret_cast<u8 *>(mmap(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | (ptr ? MAP_FIXED_NOREPLACE : 0), fd, 0));
+        guest.ptr = reinterpret_cast<u8 *>(mmap(ptr, size, permission.Get(), MAP_SHARED | (ptr ? MAP_FIXED : 0), fd, 0));
         if (guest.ptr == MAP_FAILED)
             throw exception("An error occurred while mapping shared memory in guest");
         guest.size = size;
diff --git a/app/src/main/cpp/skyline/loader/loader.cpp b/app/src/main/cpp/skyline/loader/loader.cpp
index cb9856d4..c7492b71 100644
--- a/app/src/main/cpp/skyline/loader/loader.cpp
+++ b/app/src/main/cpp/skyline/loader/loader.cpp
@@ -21,30 +21,25 @@ namespace skyline::loader {
         if (!util::PageAligned(executable.text.offset) || !util::PageAligned(executable.ro.offset) || !util::PageAligned(executable.data.offset))
             throw exception("LoadProcessData: Section offsets are not aligned with page size: 0x{:X}, 0x{:X}, 0x{:X}", executable.text.offset, executable.ro.offset, executable.data.offset);
 
-        // The data section will always be the last section in memory, so put the patch section after it
-        u64 patchOffset{executable.data.offset + dataSize};
-        std::vector<u32> patch = state.nce->PatchCode(executable.text.contents, reinterpret_cast<u64>(base), patchOffset);
+        auto patch{state.nce->GetPatchData(executable.text.contents)};
 
-        u64 patchSize{patch.size() * sizeof(u32)};
-        u64 padding{util::AlignUp(patchSize, PAGE_SIZE) - patchSize};
+        process->NewHandle<kernel::type::KPrivateMemory>(base, patch.size, memory::Permission{false, false, false}, memory::states::Reserved); // ---
+        state.logger->Debug("Successfully mapped section .patch @ 0x{:X}, Size = 0x{:X}", base, patch.size);
 
-        process->NewHandle<kernel::type::KPrivateMemory>(base + executable.text.offset, textSize, memory::Permission{true, false, true}, memory::states::CodeStatic); // R-X
-        state.logger->Debug("Successfully mapped section .text @ {}, Size = 0x{:X}", base + executable.text.offset, textSize);
+        process->NewHandle<kernel::type::KPrivateMemory>(base + patch.size + executable.text.offset, textSize, memory::Permission{true, false, true}, memory::states::CodeStatic); // R-X
+        state.logger->Debug("Successfully mapped section .text @ 0x{:X}, Size = 0x{:X}", base + patch.size + executable.text.offset, textSize);
 
-        process->NewHandle<kernel::type::KPrivateMemory>(base + executable.ro.offset, roSize, memory::Permission{true, false, false}, memory::states::CodeReadOnly); // R--
-        state.logger->Debug("Successfully mapped section .rodata @ {}, Size = 0x{:X}", base + executable.ro.offset, roSize);
+        process->NewHandle<kernel::type::KPrivateMemory>(base + patch.size + executable.ro.offset, roSize, memory::Permission{true, false, false}, memory::states::CodeReadOnly); // R--
+        state.logger->Debug("Successfully mapped section .rodata @ 0x{:X}, Size = 0x{:X}", base + patch.size + executable.ro.offset, roSize);
 
-        process->NewHandle<kernel::type::KPrivateMemory>(base + executable.data.offset, dataSize, memory::Permission{true, true, false}, memory::states::CodeMutable); // RW-
-        state.logger->Debug("Successfully mapped section .data @ {}, Size = 0x{:X}", base + executable.data.offset, dataSize);
+        process->NewHandle<kernel::type::KPrivateMemory>(base + patch.size + executable.data.offset, dataSize, memory::Permission{true, true, false}, memory::states::CodeMutable); // RW-
+        state.logger->Debug("Successfully mapped section .data + .bss @ 0x{:X}, Size = 0x{:X}", base + patch.size + executable.data.offset, dataSize);
 
-        process->NewHandle<kernel::type::KPrivateMemory>(base + patchOffset, patchSize + padding, memory::Permission{true, true, true}, memory::states::CodeMutable); // RWX
-        state.logger->Debug("Successfully mapped section .patch @ {}, Size = 0x{:X}", base + patchOffset, patchSize + padding);
+        state.nce->PatchCode(executable.text.contents, reinterpret_cast<u32*>(base), patch.size, patch.offsets);
+        std::memcpy(base + patch.size + executable.text.offset, executable.text.contents.data(), textSize);
+        std::memcpy(base + patch.size + executable.ro.offset, executable.ro.contents.data(), roSize);
+        std::memcpy(base + patch.size + executable.data.offset, executable.data.contents.data(), dataSize - executable.bssSize);
 
-        std::memcpy(base + executable.text.offset, executable.text.contents.data(), textSize);
-        std::memcpy(base + executable.ro.offset, executable.ro.contents.data(), roSize);
-        std::memcpy(base + executable.data.offset, executable.data.contents.data(), dataSize - executable.bssSize);
-        std::memcpy(base + patchOffset, patch.data(), patchSize);
-
-        return {base, patchOffset + patchSize + padding, base};
+        return {base, patch.size + textSize + roSize + dataSize, base + patch.size};
     }
 }
diff --git a/app/src/main/cpp/skyline/nce.cpp b/app/src/main/cpp/skyline/nce.cpp
index 0831e702..cf255727 100644
--- a/app/src/main/cpp/skyline/nce.cpp
+++ b/app/src/main/cpp/skyline/nce.cpp
@@ -96,153 +96,185 @@ namespace skyline::nce {
         }
     }
 
-    std::vector<u32> NCE::PatchCode(std::vector<u8> &code, u64 baseAddress, i64 patchBase) {
-        constexpr u32 TpidrEl0{0x5E82};      // ID of TPIDR_EL0 in MRS
-        constexpr u32 TpidrroEl0{0x5E83};    // ID of TPIDRRO_EL0 in MRS
-        constexpr u32 CntfrqEl0{0x5F00};     // ID of CNTFRQ_EL0 in MRS
-        constexpr u32 CntpctEl0{0x5F01};     // ID of CNTPCT_EL0 in MRS
-        constexpr u32 CntvctEl0{0x5F02};     // ID of CNTVCT_EL0 in MRS
-        constexpr u32 TegraX1Freq{19200000}; // The clock frequency of the Tegra X1 (19.2 MHz)
-        constexpr size_t MainSvcTrampolineSize{17};
+    constexpr u8 MainSvcTrampolineSize{17}; // Size of the main SVC trampoline function in u32 units
+    constexpr u32 TpidrEl0{0x5E82};         // ID of TPIDR_EL0 in MRS
+    constexpr u32 TpidrroEl0{0x5E83};       // ID of TPIDRRO_EL0 in MRS
+    constexpr u32 CntfrqEl0{0x5F00};        // ID of CNTFRQ_EL0 in MRS
+    constexpr u32 CntpctEl0{0x5F01};        // ID of CNTPCT_EL0 in MRS
+    constexpr u32 CntvctEl0{0x5F02};        // ID of CNTVCT_EL0 in MRS
+    constexpr u32 TegraX1Freq{19200000};    // The clock frequency of the Tegra X1 (19.2 MHz)
 
-        size_t index{};
-        std::vector<u32> patch(guest::SaveCtxSize + guest::LoadCtxSize + MainSvcTrampolineSize);
+    NCE::PatchData NCE::GetPatchData(const std::vector<u8> &text) {
+        size_t size{guest::SaveCtxSize + guest::LoadCtxSize + MainSvcTrampolineSize};
+        std::vector<size_t> offsets;
 
-        std::memcpy(patch.data(), reinterpret_cast<void *>(&guest::SaveCtx), guest::SaveCtxSize * sizeof(u32));
-        index += guest::SaveCtxSize;
+        u64 frequency;
+        asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency));
+        bool rescaleClock{frequency != TegraX1Freq};
+
+        auto start{reinterpret_cast<const u32 *>(text.data())}, end{reinterpret_cast<const u32 *>(text.data() + text.size())};
+        for (const u32 *instruction{start}; instruction < end; instruction++) {
+            auto svc{*reinterpret_cast<const instr::Svc *>(instruction)};
+            auto mrs{*reinterpret_cast<const instr::Mrs *>(instruction)};
+            auto msr{*reinterpret_cast<const instr::Msr *>(instruction)};
+
+            if (svc.Verify()) {
+                size += 7;
+                offsets.push_back(instruction - start);
+            } else if (mrs.Verify()) {
+                if (mrs.srcReg == TpidrroEl0 || mrs.srcReg == TpidrEl0) {
+                    size += ((mrs.destReg != regs::X0) ? 6 : 3);
+                    offsets.push_back(instruction - start);
+                } else {
+                    if (rescaleClock) {
+                        if (mrs.srcReg == CntpctEl0) {
+                            size += guest::RescaleClockSize + 3;
+                            offsets.push_back(instruction - start);
+                        } else if (mrs.srcReg == CntfrqEl0) {
+                            size += 3;
+                            offsets.push_back(instruction - start);
+                        }
+                    } else if (mrs.srcReg == CntpctEl0) {
+                        offsets.push_back(instruction - start);
+                    }
+                }
+            } else if (msr.Verify() && msr.destReg == TpidrEl0) {
+                size += 6;
+                offsets.push_back(instruction - start);
+            }
+        }
+        return {util::AlignUp(size * sizeof(u32), PAGE_SIZE), offsets};
+    }
+
+    void NCE::PatchCode(std::vector<u8> &text, u32 *patch, size_t patchSize, const std::vector<size_t> &offsets) {
+        u32 *start{patch};
+        u32 *end{patch + (patchSize / sizeof(u32))};
+
+        std::memcpy(patch, reinterpret_cast<void *>(&guest::SaveCtx), guest::SaveCtxSize * sizeof(u32));
+        patch += guest::SaveCtxSize;
 
         {
             /* Main SVC Trampoline */
-
             /* Store LR in 16B of pre-allocated stack */
-            patch[index++] = 0xF90007FE; // STR LR, [SP, #8]
+            *patch++ = 0xF90007FE; // STR LR, [SP, #8]
 
             /* Replace Skyline TLS with host TLS */
-            patch[index++] = 0xD53BD041; // MRS X1, TPIDR_EL0
-            patch[index++] = 0xF9415022; // LDR X2, [X1, #0x2A0] (ThreadContext::hostTpidrEl0)
+            *patch++ = 0xD53BD041; // MRS X1, TPIDR_EL0
+            *patch++ = 0xF9415022; // LDR X2, [X1, #0x2A0] (ThreadContext::hostTpidrEl0)
 
             /* Replace guest stack with host stack */
-            patch[index++] = 0xD51BD042; // MSR TPIDR_EL0, X2
-            patch[index++] = 0x910003E2; // MOV X2, SP
-            patch[index++] = 0xF9415423; // LDR X3, [X1, #0x2A8] (ThreadContext::hostSp)
-            patch[index++] = 0x9100007F; // MOV SP, X3
+            *patch++ = 0xD51BD042; // MSR TPIDR_EL0, X2
+            *patch++ = 0x910003E2; // MOV X2, SP
+            *patch++ = 0xF9415423; // LDR X3, [X1, #0x2A8] (ThreadContext::hostSp)
+            *patch++ = 0x9100007F; // MOV SP, X3
 
             /* Store Skyline TLS + guest SP on stack */
-            patch[index++] = 0xA9BF0BE1; // STP X1, X2, [SP, #-16]!
+            *patch++ = 0xA9BF0BE1; // STP X1, X2, [SP, #-16]!
 
             /* Jump to SvcHandler */
             for (const auto &mov : instr::MoveRegister(regs::X2, reinterpret_cast<u64>(&NCE::SvcHandler)))
                 if (mov)
-                    patch[index++] = mov;
-            patch[index++] = 0xD63F0040; // BLR X2
+                    *patch++ = mov;
+            *patch++ = 0xD63F0040; // BLR X2
 
             /* Restore Skyline TLS + guest SP */
-            patch[index++] = 0xA8C10BE1; // LDP X1, X2, [SP], #16
-            patch[index++] = 0xD51BD041; // MSR TPIDR_EL0, X1
-            patch[index++] = 0x9100005F; // MOV SP, X2
+            *patch++ = 0xA8C10BE1; // LDP X1, X2, [SP], #16
+            *patch++ = 0xD51BD041; // MSR TPIDR_EL0, X1
+            *patch++ = 0x9100005F; // MOV SP, X2
 
             /* Restore LR and Return */
-            patch[index++] = 0xF94007FE; // LDR LR, [SP, #8]
-            patch[index++] = 0xD65F03C0; // RET
+            *patch++ = 0xF94007FE; // LDR LR, [SP, #8]
+            *patch++ = 0xD65F03C0; // RET
         }
 
-        std::memcpy(patch.data() + index, reinterpret_cast<void *>(&guest::LoadCtx), guest::LoadCtxSize * sizeof(u32));
-        index += guest::LoadCtxSize;
+        std::memcpy(patch, reinterpret_cast<void *>(&guest::LoadCtx), guest::LoadCtxSize * sizeof(u32));
+        patch += guest::LoadCtxSize;
 
         u64 frequency;
         asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency));
+        bool rescaleClock{frequency != TegraX1Freq};
 
-        i64 patchOffset{patchBase / i64(sizeof(u32))};
-        u32 *start{reinterpret_cast<u32 *>(code.data())};
-        u32 *end{start + (code.size() / sizeof(u32))};
-        for (u32 *instruction{start}; instruction < end; instruction++) {
+        for (auto offset : offsets) {
+            u32 *instruction{reinterpret_cast<u32 *>(text.data()) + offset};
             auto svc{*reinterpret_cast<instr::Svc *>(instruction)};
             auto mrs{*reinterpret_cast<instr::Mrs *>(instruction)};
             auto msr{*reinterpret_cast<instr::Msr *>(instruction)};
 
             if (svc.Verify()) {
                 /* Per-SVC Trampoline */
-                patch.resize(patch.size() + 7);
-
                 /* Rewrite SVC with B to trampoline */
-                *instruction = instr::B(patchOffset + index).raw;
+                *instruction = instr::B((end - patch) + offset, true).raw;
 
                 /* Save Context */
-                patch[index++] = 0xF81F0FFE; // STR LR, [SP, #-16]!
-                patch[index] = instr::BL(-index).raw;
-                index++;
+                *patch++ = 0xF81F0FFE; // STR LR, [SP, #-16]!
+                *patch = instr::BL(start - patch).raw;
+                patch++;
 
                 /* Jump to main SVC trampoline */
-                patch[index++] = instr::Movz(regs::W0, static_cast<u16>(svc.value)).raw;
-                patch[index] = instr::BL(guest::SaveCtxSize - index).raw;
-                index++;
+                *patch++ = instr::Movz(regs::W0, static_cast<u16>(svc.value)).raw;
+                *patch = instr::BL((start - patch) + guest::SaveCtxSize).raw;
+                patch++;
 
                 /* Restore Context and Return */
-                patch[index] = instr::BL(guest::SaveCtxSize + MainSvcTrampolineSize - index).raw;
-                index++;
-                patch[index++] = 0xF84107FE; // LDR LR, [SP], #16
-                patch[index] = instr::B(-(patchOffset + index - 1)).raw;
-                index++;
+                *patch = instr::BL((start - patch) + guest::SaveCtxSize + MainSvcTrampolineSize).raw;
+                patch++;
+                *patch++ = 0xF84107FE; // LDR LR, [SP], #16
+                *patch = instr::B((end - patch) + offset + 1).raw;
+                patch++;
             } else if (mrs.Verify()) {
                 if (mrs.srcReg == TpidrroEl0 || mrs.srcReg == TpidrEl0) {
                     /* Emulated TLS Register Load */
-                    patch.resize(patch.size() + ((mrs.destReg != regs::X0) ? 6 : 3));
-
                     /* Rewrite MRS with B to trampoline */
-                    *instruction = instr::B(patchOffset + index).raw;
+                    *instruction = instr::B((end - patch) + offset, true).raw;
 
                     /* Allocate Scratch Register */
                     if (mrs.destReg != regs::X0)
-                        patch[index++] = 0xF81F0FE0; // STR X0, [SP, #-16]!
+                        *patch++ = 0xF81F0FE0; // STR X0, [SP, #-16]!
 
                     /* Retrieve emulated TLS register from ThreadContext */
-                    patch[index++] = 0xD53BD040; // MRS X0, TPIDR_EL0
+                    *patch++ = 0xD53BD040; // MRS X0, TPIDR_EL0
                     if (mrs.srcReg == TpidrroEl0)
-                        patch[index++] = 0xF9415800; // LDR X0, [X0, #0x2B0] (ThreadContext::tpidrroEl0)
+                        *patch++ = 0xF9415800; // LDR X0, [X0, #0x2B0] (ThreadContext::tpidrroEl0)
                     else
-                        patch[index++] = 0xF9415C00; // LDR X0, [X0, #0x2B8] (ThreadContext::tpidrEl0)
+                        *patch++ = 0xF9415C00; // LDR X0, [X0, #0x2B8] (ThreadContext::tpidrEl0)
 
                     /* Restore Scratch Register and Return */
                     if (mrs.destReg != regs::X0) {
-                        patch[index++] = instr::Mov(regs::X(mrs.destReg), regs::X0).raw;
-                        patch[index++] = 0xF84107E0; // LDR X0, [SP], #16
+                        *patch++ = instr::Mov(regs::X(mrs.destReg), regs::X0).raw;
+                        *patch++ = 0xF84107E0; // LDR X0, [SP], #16
                     }
-                    patch[index] = instr::B(-(patchOffset + index - 1)).raw;
-                    index++;
+                    *patch = instr::B((end - patch) + offset + 1).raw;
+                    patch++;
                 } else {
-                    if (frequency != TegraX1Freq) {
+                    if (rescaleClock) {
                         if (mrs.srcReg == CntpctEl0) {
                             /* Physical Counter Load Emulation (With Rescaling) */
-                            patch.resize(patch.size() + guest::RescaleClockSize + 3);
-
                             /* Rewrite MRS with B to trampoline */
-                            *instruction = instr::B(patchOffset + index).raw;
+                            *instruction = instr::B((end - patch) + offset, true).raw;
 
                             /* Rescale host clock */
-                            std::memcpy(patch.data() + index, reinterpret_cast<void *>(&guest::RescaleClock), guest::RescaleClockSize);
-                            index += guest::RescaleClockSize;
+                            std::memcpy(patch, reinterpret_cast<void *>(&guest::RescaleClock), guest::RescaleClockSize);
+                            patch += guest::RescaleClockSize;
 
                             /* Load result from stack into destination register */
                             instr::Ldr ldr(0xF94003E0); // LDR XOUT, [SP]
                             ldr.destReg = mrs.destReg;
-                            patch[index++] = ldr.raw;
+                            *patch++ = ldr.raw;
 
                             /* Free 32B stack allocation by RescaleClock and Return */
-                            patch[index++] = {0x910083FF}; // ADD SP, SP, #32
-                            patch[index] = instr::B(-(patchOffset + index - 1)).raw;
-                            index++;
+                            *patch++ = {0x910083FF}; // ADD SP, SP, #32
+                            *patch = instr::B((end - patch) + offset + 1).raw;
+                            patch++;
                         } else if (mrs.srcReg == CntfrqEl0) {
                             /* Physical Counter Frequency Load Emulation */
-                            patch.resize(patch.size() + 3);
-
                             /* Rewrite MRS with B to trampoline */
-                            *instruction = instr::B(patchOffset + index).raw;
+                            *instruction = instr::B((end - patch) + offset, true).raw;
 
                             /* Write back Tegra X1 Counter Frequency and Return */
                             for (const auto &mov : instr::MoveRegister(regs::X(mrs.destReg), TegraX1Freq))
-                                patch[index++] = mov;
-                            patch[index] = instr::B(-(patchOffset + index - 1)).raw;
-                            index++;
+                                *patch++ = mov;
+                            *patch = instr::B((end - patch) + offset + 1).raw;
+                            patch++;
                         }
                     } else if (mrs.srcReg == CntpctEl0) {
                         /* Physical Counter Load Emulation (Without Rescaling) */
@@ -250,31 +282,25 @@ namespace skyline::nce {
                         *instruction = instr::Mrs(CntvctEl0, regs::X(mrs.destReg)).raw;
                     }
                 }
-            } else if (msr.Verify()) {
-                if (msr.destReg == TpidrEl0) {
-                    /* Emulated TLS Register Store */
-                    patch.resize(patch.size() + 6);
+            } else if (msr.Verify() && msr.destReg == TpidrEl0) {
+                /* Emulated TLS Register Store */
+                /* Rewrite MSR with B to trampoline */
+                *instruction = instr::B((end - patch) + offset, true).raw;
 
-                    /* Rewrite MSR with B to trampoline */
-                    *instruction = instr::B(patchOffset + index).raw;
+                /* Allocate Scratch Registers */
+                bool x0x1{mrs.srcReg != regs::X0 && mrs.srcReg != regs::X1};
+                *patch++ = x0x1 ? 0xA9BF07E0 : 0xA9BF0FE2; // STP X(0/2), X(1/3), [SP, #-16]!
 
-                    /* Allocate Scratch Registers */
-                    bool x0x1{mrs.srcReg != regs::X0 && mrs.srcReg != regs::X1};
-                    patch[index++] = x0x1 ? 0xA9BF07E0 : 0xA9BF0FE2; // STP X(0/2), X(1/3), [SP, #-16]!
+                /* Store new TLS value into ThreadContext */
+                *patch++ = x0x1 ? 0xD53BD040 : 0xD53BD042; // MRS X(0/2), TPIDR_EL0
+                *patch++ = instr::Mov(x0x1 ? regs::X1 : regs::X3, regs::X(msr.srcReg)).raw;
+                *patch++ = x0x1 ? 0xF9015C01 : 0xF9015C03; // STR X(1/3), [X0, #0x4B8] (ThreadContext::tpidrEl0)
 
-                    /* Store new TLS value into ThreadContext */
-                    patch[index++] = x0x1 ? 0xD53BD040 : 0xD53BD042; // MRS X(0/2), TPIDR_EL0
-                    patch[index++] = instr::Mov(x0x1 ? regs::X1 : regs::X3, regs::X(msr.srcReg)).raw;
-                    patch[index++] = x0x1 ? 0xF9015C01 : 0xF9015C03; // STR X(1/3), [X0, #0x4B8] (ThreadContext::tpidrEl0)
-
-                    /* Restore Scratch Registers and Return */
-                    patch[index++] = x0x1 ? 0xA8C107E0 : 0xA8C10FE2; // LDP X(0/2), X(1/3), [SP], #16
-                    patch[index] = instr::B(-(patchOffset + index - 1)).raw;
-                    index++;
-                }
+                /* Restore Scratch Registers and Return */
+                *patch++ = x0x1 ? 0xA8C107E0 : 0xA8C10FE2; // LDP X(0/2), X(1/3), [SP], #16
+                *patch = instr::B((end - patch) + offset + 1).raw;
+                patch++;
             }
-            patchOffset--;
         }
-        return patch;
     }
 }
diff --git a/app/src/main/cpp/skyline/nce.h b/app/src/main/cpp/skyline/nce.h
index ecc45992..e023b8cb 100644
--- a/app/src/main/cpp/skyline/nce.h
+++ b/app/src/main/cpp/skyline/nce.h
@@ -23,11 +23,17 @@ namespace skyline::nce {
 
         void Execute();
 
+        struct PatchData {
+            size_t size; //!< Size of the .patch section
+            std::vector<size_t> offsets; //!< Offsets in .text of instructions that need to be patched
+        };
+
+        static PatchData GetPatchData(const std::vector<u8> &text);
+
         /**
-         * @brief Generates a patch section for the supplied code
-         * @param baseAddress The address at which the code is mapped
-         * @param patchBase The offset of the patch section from the base address
+         * @brief Writes the .patch section and mutates the code accordingly
+         * @param patch A pointer to the .patch section which should be exactly patchSize in size and located before the .text section
          */
-        std::vector<u32> PatchCode(std::vector<u8> &code, u64 baseAddress, i64 patchBase);
+        static void PatchCode(std::vector<u8> &text, u32* patch, size_t patchSize, const std::vector<size_t>& offsets);
     };
 }
diff --git a/app/src/main/cpp/skyline/nce/instructions.h b/app/src/main/cpp/skyline/nce/instructions.h
index 04db78c5..ddb0cc8b 100644
--- a/app/src/main/cpp/skyline/nce/instructions.h
+++ b/app/src/main/cpp/skyline/nce/instructions.h
@@ -114,8 +114,8 @@ namespace skyline::nce {
             /**
              * @param offset The relative offset to branch to (In 32-bit units)
              */
-            constexpr B(i32 offset) {
-                this->offset = offset;
+            constexpr B(i64 offset, bool negate = false) {
+                this->offset = negate ? -offset : offset;
                 sig = 0x5;
             }
 
@@ -307,6 +307,8 @@ namespace skyline::nce {
                         instruction = instr::Movz(destination, offsetValue, offset).raw;
                         zeroed = true;
                     }
+                } else {
+                    instruction = 0;
                 }
                 offset++;
             }