Move .patch to start of executable (Pre-Patching)

This commit is contained in:
◱ PixelyIon 2020-10-15 13:40:13 +05:30 committed by ◱ PixelyIon
parent 369bd469f6
commit 6f2cd41470
7 changed files with 168 additions and 128 deletions

View File

@ -7,6 +7,8 @@
namespace skyline::kernel {
MemoryManager::MemoryManager(const DeviceState &state) : state(state) {}
constexpr size_t RegionAlignment{1ULL << 21}; //!< The minimum alignment of a HOS memory region
void MemoryManager::InitializeVmm(memory::AddressSpaceType type) {
switch (type) {
case memory::AddressSpaceType::AddressSpace32Bit:
@ -22,7 +24,7 @@ namespace skyline::kernel {
case memory::AddressSpaceType::AddressSpace39Bit: {
addressSpace.address = 0;
addressSpace.size = 1UL << 39;
base.size = 0x78000000 + 0x1000000000 + 0x180000000 + 0x80000000 + 0x1000000000;
base.size = 0x78000000 + 0x1000000000 + 0x180000000 + 0x80000000 + 0x1000000000; // Code region size is an assumed maximum here
break;
}
@ -41,7 +43,7 @@ namespace skyline::kernel {
}
start = util::HexStringToInt<u64>(std::string_view(maps.data() + maps.find_first_of('-', line) + 1, sizeof(u64) * 2));
alignedStart = util::AlignUp(start, 1ULL << 21);
alignedStart = util::AlignUp(start, RegionAlignment);
if (alignedStart + base.size > addressSpace.size)
break;
} while ((line = maps.find_first_of('\n', line)) != std::string::npos && line++);
@ -60,6 +62,9 @@ namespace skyline::kernel {
void MemoryManager::InitializeRegions(u8 *codeStart, u64 size) {
u64 address{reinterpret_cast<u64>(codeStart)};
if (!util::IsAligned(address, RegionAlignment))
throw exception("Non-aligned code region was used to initialize regions: 0x{:X} - 0x{:X}", codeStart, codeStart + size);
switch (addressSpace.size) {
case 1UL << 36: {
code.address = base.address;
@ -79,7 +84,7 @@ namespace skyline::kernel {
case 1UL << 39: {
code.address = base.address;
code.size = 0x78000000;
code.size = util::AlignUp(size, RegionAlignment);
alias.address = code.address + code.size;
alias.size = 0x1000000000;
heap.address = alias.address + alias.size;
@ -95,6 +100,12 @@ namespace skyline::kernel {
throw exception("Regions initialized without VMM initialization");
}
auto newSize{code.size + alias.size + stack.size + heap.size + tlsIo.size};
if (newSize > base.size)
throw exception("Region size has exceeded pre-allocated area: 0x{:X}/0x{:X}", newSize, base.size);
if (newSize != base.size)
munmap(reinterpret_cast<u8 *>(base.address) + base.size, newSize - base.size);
if (size > code.size)
throw exception("Code region ({}) is smaller than mapped code size ({})", code.size, size);

View File

@ -678,11 +678,11 @@ namespace skyline::kernel::svc {
break;
case constant::infoState::AddressSpaceBaseAddr:
out = state.process->memory.addressSpace.address;
out = state.process->memory.base.address;
break;
case constant::infoState::AddressSpaceSize:
out = state.process->memory.addressSpace.size;
out = state.process->memory.base.size;
break;
case constant::infoState::StackRegionBaseAddr:

View File

@ -26,7 +26,7 @@ namespace skyline::kernel::type {
if (ptr && !util::PageAligned(ptr))
throw exception("KSharedMemory was mapped to a non-page-aligned address: 0x{:X}", ptr);
guest.ptr = reinterpret_cast<u8 *>(mmap(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | (ptr ? MAP_FIXED_NOREPLACE : 0), fd, 0));
guest.ptr = reinterpret_cast<u8 *>(mmap(ptr, size, permission.Get(), MAP_SHARED | (ptr ? MAP_FIXED : 0), fd, 0));
if (guest.ptr == MAP_FAILED)
throw exception("An error occurred while mapping shared memory in guest");
guest.size = size;

View File

@ -21,30 +21,25 @@ namespace skyline::loader {
if (!util::PageAligned(executable.text.offset) || !util::PageAligned(executable.ro.offset) || !util::PageAligned(executable.data.offset))
throw exception("LoadProcessData: Section offsets are not aligned with page size: 0x{:X}, 0x{:X}, 0x{:X}", executable.text.offset, executable.ro.offset, executable.data.offset);
// The data section will always be the last section in memory, so put the patch section after it
u64 patchOffset{executable.data.offset + dataSize};
std::vector<u32> patch = state.nce->PatchCode(executable.text.contents, reinterpret_cast<u64>(base), patchOffset);
auto patch{state.nce->GetPatchData(executable.text.contents)};
u64 patchSize{patch.size() * sizeof(u32)};
u64 padding{util::AlignUp(patchSize, PAGE_SIZE) - patchSize};
process->NewHandle<kernel::type::KPrivateMemory>(base, patch.size, memory::Permission{false, false, false}, memory::states::Reserved); // ---
state.logger->Debug("Successfully mapped section .patch @ 0x{:X}, Size = 0x{:X}", base, patch.size);
process->NewHandle<kernel::type::KPrivateMemory>(base + executable.text.offset, textSize, memory::Permission{true, false, true}, memory::states::CodeStatic); // R-X
state.logger->Debug("Successfully mapped section .text @ {}, Size = 0x{:X}", base + executable.text.offset, textSize);
process->NewHandle<kernel::type::KPrivateMemory>(base + patch.size + executable.text.offset, textSize, memory::Permission{true, false, true}, memory::states::CodeStatic); // R-X
state.logger->Debug("Successfully mapped section .text @ 0x{:X}, Size = 0x{:X}", base + patch.size + executable.text.offset, textSize);
process->NewHandle<kernel::type::KPrivateMemory>(base + executable.ro.offset, roSize, memory::Permission{true, false, false}, memory::states::CodeReadOnly); // R--
state.logger->Debug("Successfully mapped section .rodata @ {}, Size = 0x{:X}", base + executable.ro.offset, roSize);
process->NewHandle<kernel::type::KPrivateMemory>(base + patch.size + executable.ro.offset, roSize, memory::Permission{true, false, false}, memory::states::CodeReadOnly); // R--
state.logger->Debug("Successfully mapped section .rodata @ 0x{:X}, Size = 0x{:X}", base + patch.size + executable.ro.offset, roSize);
process->NewHandle<kernel::type::KPrivateMemory>(base + executable.data.offset, dataSize, memory::Permission{true, true, false}, memory::states::CodeMutable); // RW-
state.logger->Debug("Successfully mapped section .data @ {}, Size = 0x{:X}", base + executable.data.offset, dataSize);
process->NewHandle<kernel::type::KPrivateMemory>(base + patch.size + executable.data.offset, dataSize, memory::Permission{true, true, false}, memory::states::CodeMutable); // RW-
state.logger->Debug("Successfully mapped section .data + .bss @ 0x{:X}, Size = 0x{:X}", base + patch.size + executable.data.offset, dataSize);
process->NewHandle<kernel::type::KPrivateMemory>(base + patchOffset, patchSize + padding, memory::Permission{true, true, true}, memory::states::CodeMutable); // RWX
state.logger->Debug("Successfully mapped section .patch @ {}, Size = 0x{:X}", base + patchOffset, patchSize + padding);
state.nce->PatchCode(executable.text.contents, reinterpret_cast<u32*>(base), patch.size, patch.offsets);
std::memcpy(base + patch.size + executable.text.offset, executable.text.contents.data(), textSize);
std::memcpy(base + patch.size + executable.ro.offset, executable.ro.contents.data(), roSize);
std::memcpy(base + patch.size + executable.data.offset, executable.data.contents.data(), dataSize - executable.bssSize);
std::memcpy(base + executable.text.offset, executable.text.contents.data(), textSize);
std::memcpy(base + executable.ro.offset, executable.ro.contents.data(), roSize);
std::memcpy(base + executable.data.offset, executable.data.contents.data(), dataSize - executable.bssSize);
std::memcpy(base + patchOffset, patch.data(), patchSize);
return {base, patchOffset + patchSize + padding, base};
return {base, patch.size + textSize + roSize + dataSize, base + patch.size};
}
}

View File

@ -96,153 +96,185 @@ namespace skyline::nce {
}
}
std::vector<u32> NCE::PatchCode(std::vector<u8> &code, u64 baseAddress, i64 patchBase) {
constexpr u8 MainSvcTrampolineSize{17}; // Size of the main SVC trampoline function in u32 units
constexpr u32 TpidrEl0{0x5E82}; // ID of TPIDR_EL0 in MRS
constexpr u32 TpidrroEl0{0x5E83}; // ID of TPIDRRO_EL0 in MRS
constexpr u32 CntfrqEl0{0x5F00}; // ID of CNTFRQ_EL0 in MRS
constexpr u32 CntpctEl0{0x5F01}; // ID of CNTPCT_EL0 in MRS
constexpr u32 CntvctEl0{0x5F02}; // ID of CNTVCT_EL0 in MRS
constexpr u32 TegraX1Freq{19200000}; // The clock frequency of the Tegra X1 (19.2 MHz)
constexpr size_t MainSvcTrampolineSize{17};
size_t index{};
std::vector<u32> patch(guest::SaveCtxSize + guest::LoadCtxSize + MainSvcTrampolineSize);
NCE::PatchData NCE::GetPatchData(const std::vector<u8> &text) {
size_t size{guest::SaveCtxSize + guest::LoadCtxSize + MainSvcTrampolineSize};
std::vector<size_t> offsets;
std::memcpy(patch.data(), reinterpret_cast<void *>(&guest::SaveCtx), guest::SaveCtxSize * sizeof(u32));
index += guest::SaveCtxSize;
u64 frequency;
asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency));
bool rescaleClock{frequency != TegraX1Freq};
auto start{reinterpret_cast<const u32 *>(text.data())}, end{reinterpret_cast<const u32 *>(text.data() + text.size())};
for (const u32 *instruction{start}; instruction < end; instruction++) {
auto svc{*reinterpret_cast<const instr::Svc *>(instruction)};
auto mrs{*reinterpret_cast<const instr::Mrs *>(instruction)};
auto msr{*reinterpret_cast<const instr::Msr *>(instruction)};
if (svc.Verify()) {
size += 7;
offsets.push_back(instruction - start);
} else if (mrs.Verify()) {
if (mrs.srcReg == TpidrroEl0 || mrs.srcReg == TpidrEl0) {
size += ((mrs.destReg != regs::X0) ? 6 : 3);
offsets.push_back(instruction - start);
} else {
if (rescaleClock) {
if (mrs.srcReg == CntpctEl0) {
size += guest::RescaleClockSize + 3;
offsets.push_back(instruction - start);
} else if (mrs.srcReg == CntfrqEl0) {
size += 3;
offsets.push_back(instruction - start);
}
} else if (mrs.srcReg == CntpctEl0) {
offsets.push_back(instruction - start);
}
}
} else if (msr.Verify() && msr.destReg == TpidrEl0) {
size += 6;
offsets.push_back(instruction - start);
}
}
return {util::AlignUp(size * sizeof(u32), PAGE_SIZE), offsets};
}
void NCE::PatchCode(std::vector<u8> &text, u32 *patch, size_t patchSize, const std::vector<size_t> &offsets) {
u32 *start{patch};
u32 *end{patch + (patchSize / sizeof(u32))};
std::memcpy(patch, reinterpret_cast<void *>(&guest::SaveCtx), guest::SaveCtxSize * sizeof(u32));
patch += guest::SaveCtxSize;
{
/* Main SVC Trampoline */
/* Store LR in 16B of pre-allocated stack */
patch[index++] = 0xF90007FE; // STR LR, [SP, #8]
*patch++ = 0xF90007FE; // STR LR, [SP, #8]
/* Replace Skyline TLS with host TLS */
patch[index++] = 0xD53BD041; // MRS X1, TPIDR_EL0
patch[index++] = 0xF9415022; // LDR X2, [X1, #0x2A0] (ThreadContext::hostTpidrEl0)
*patch++ = 0xD53BD041; // MRS X1, TPIDR_EL0
*patch++ = 0xF9415022; // LDR X2, [X1, #0x2A0] (ThreadContext::hostTpidrEl0)
/* Replace guest stack with host stack */
patch[index++] = 0xD51BD042; // MSR TPIDR_EL0, X2
patch[index++] = 0x910003E2; // MOV X2, SP
patch[index++] = 0xF9415423; // LDR X3, [X1, #0x2A8] (ThreadContext::hostSp)
patch[index++] = 0x9100007F; // MOV SP, X3
*patch++ = 0xD51BD042; // MSR TPIDR_EL0, X2
*patch++ = 0x910003E2; // MOV X2, SP
*patch++ = 0xF9415423; // LDR X3, [X1, #0x2A8] (ThreadContext::hostSp)
*patch++ = 0x9100007F; // MOV SP, X3
/* Store Skyline TLS + guest SP on stack */
patch[index++] = 0xA9BF0BE1; // STP X1, X2, [SP, #-16]!
*patch++ = 0xA9BF0BE1; // STP X1, X2, [SP, #-16]!
/* Jump to SvcHandler */
for (const auto &mov : instr::MoveRegister(regs::X2, reinterpret_cast<u64>(&NCE::SvcHandler)))
if (mov)
patch[index++] = mov;
patch[index++] = 0xD63F0040; // BLR X2
*patch++ = mov;
*patch++ = 0xD63F0040; // BLR X2
/* Restore Skyline TLS + guest SP */
patch[index++] = 0xA8C10BE1; // LDP X1, X2, [SP], #16
patch[index++] = 0xD51BD041; // MSR TPIDR_EL0, X1
patch[index++] = 0x9100005F; // MOV SP, X2
*patch++ = 0xA8C10BE1; // LDP X1, X2, [SP], #16
*patch++ = 0xD51BD041; // MSR TPIDR_EL0, X1
*patch++ = 0x9100005F; // MOV SP, X2
/* Restore LR and Return */
patch[index++] = 0xF94007FE; // LDR LR, [SP, #8]
patch[index++] = 0xD65F03C0; // RET
*patch++ = 0xF94007FE; // LDR LR, [SP, #8]
*patch++ = 0xD65F03C0; // RET
}
std::memcpy(patch.data() + index, reinterpret_cast<void *>(&guest::LoadCtx), guest::LoadCtxSize * sizeof(u32));
index += guest::LoadCtxSize;
std::memcpy(patch, reinterpret_cast<void *>(&guest::LoadCtx), guest::LoadCtxSize * sizeof(u32));
patch += guest::LoadCtxSize;
u64 frequency;
asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency));
bool rescaleClock{frequency != TegraX1Freq};
i64 patchOffset{patchBase / i64(sizeof(u32))};
u32 *start{reinterpret_cast<u32 *>(code.data())};
u32 *end{start + (code.size() / sizeof(u32))};
for (u32 *instruction{start}; instruction < end; instruction++) {
for (auto offset : offsets) {
u32 *instruction{reinterpret_cast<u32 *>(text.data()) + offset};
auto svc{*reinterpret_cast<instr::Svc *>(instruction)};
auto mrs{*reinterpret_cast<instr::Mrs *>(instruction)};
auto msr{*reinterpret_cast<instr::Msr *>(instruction)};
if (svc.Verify()) {
/* Per-SVC Trampoline */
patch.resize(patch.size() + 7);
/* Rewrite SVC with B to trampoline */
*instruction = instr::B(patchOffset + index).raw;
*instruction = instr::B((end - patch) + offset, true).raw;
/* Save Context */
patch[index++] = 0xF81F0FFE; // STR LR, [SP, #-16]!
patch[index] = instr::BL(-index).raw;
index++;
*patch++ = 0xF81F0FFE; // STR LR, [SP, #-16]!
*patch = instr::BL(start - patch).raw;
patch++;
/* Jump to main SVC trampoline */
patch[index++] = instr::Movz(regs::W0, static_cast<u16>(svc.value)).raw;
patch[index] = instr::BL(guest::SaveCtxSize - index).raw;
index++;
*patch++ = instr::Movz(regs::W0, static_cast<u16>(svc.value)).raw;
*patch = instr::BL((start - patch) + guest::SaveCtxSize).raw;
patch++;
/* Restore Context and Return */
patch[index] = instr::BL(guest::SaveCtxSize + MainSvcTrampolineSize - index).raw;
index++;
patch[index++] = 0xF84107FE; // LDR LR, [SP], #16
patch[index] = instr::B(-(patchOffset + index - 1)).raw;
index++;
*patch = instr::BL((start - patch) + guest::SaveCtxSize + MainSvcTrampolineSize).raw;
patch++;
*patch++ = 0xF84107FE; // LDR LR, [SP], #16
*patch = instr::B((end - patch) + offset + 1).raw;
patch++;
} else if (mrs.Verify()) {
if (mrs.srcReg == TpidrroEl0 || mrs.srcReg == TpidrEl0) {
/* Emulated TLS Register Load */
patch.resize(patch.size() + ((mrs.destReg != regs::X0) ? 6 : 3));
/* Rewrite MRS with B to trampoline */
*instruction = instr::B(patchOffset + index).raw;
*instruction = instr::B((end - patch) + offset, true).raw;
/* Allocate Scratch Register */
if (mrs.destReg != regs::X0)
patch[index++] = 0xF81F0FE0; // STR X0, [SP, #-16]!
*patch++ = 0xF81F0FE0; // STR X0, [SP, #-16]!
/* Retrieve emulated TLS register from ThreadContext */
patch[index++] = 0xD53BD040; // MRS X0, TPIDR_EL0
*patch++ = 0xD53BD040; // MRS X0, TPIDR_EL0
if (mrs.srcReg == TpidrroEl0)
patch[index++] = 0xF9415800; // LDR X0, [X0, #0x2B0] (ThreadContext::tpidrroEl0)
*patch++ = 0xF9415800; // LDR X0, [X0, #0x2B0] (ThreadContext::tpidrroEl0)
else
patch[index++] = 0xF9415C00; // LDR X0, [X0, #0x2B8] (ThreadContext::tpidrEl0)
*patch++ = 0xF9415C00; // LDR X0, [X0, #0x2B8] (ThreadContext::tpidrEl0)
/* Restore Scratch Register and Return */
if (mrs.destReg != regs::X0) {
patch[index++] = instr::Mov(regs::X(mrs.destReg), regs::X0).raw;
patch[index++] = 0xF84107E0; // LDR X0, [SP], #16
*patch++ = instr::Mov(regs::X(mrs.destReg), regs::X0).raw;
*patch++ = 0xF84107E0; // LDR X0, [SP], #16
}
patch[index] = instr::B(-(patchOffset + index - 1)).raw;
index++;
*patch = instr::B((end - patch) + offset + 1).raw;
patch++;
} else {
if (frequency != TegraX1Freq) {
if (rescaleClock) {
if (mrs.srcReg == CntpctEl0) {
/* Physical Counter Load Emulation (With Rescaling) */
patch.resize(patch.size() + guest::RescaleClockSize + 3);
/* Rewrite MRS with B to trampoline */
*instruction = instr::B(patchOffset + index).raw;
*instruction = instr::B((end - patch) + offset, true).raw;
/* Rescale host clock */
std::memcpy(patch.data() + index, reinterpret_cast<void *>(&guest::RescaleClock), guest::RescaleClockSize);
index += guest::RescaleClockSize;
std::memcpy(patch, reinterpret_cast<void *>(&guest::RescaleClock), guest::RescaleClockSize);
patch += guest::RescaleClockSize;
/* Load result from stack into destination register */
instr::Ldr ldr(0xF94003E0); // LDR XOUT, [SP]
ldr.destReg = mrs.destReg;
patch[index++] = ldr.raw;
*patch++ = ldr.raw;
/* Free 32B stack allocation by RescaleClock and Return */
patch[index++] = {0x910083FF}; // ADD SP, SP, #32
patch[index] = instr::B(-(patchOffset + index - 1)).raw;
index++;
*patch++ = {0x910083FF}; // ADD SP, SP, #32
*patch = instr::B((end - patch) + offset + 1).raw;
patch++;
} else if (mrs.srcReg == CntfrqEl0) {
/* Physical Counter Frequency Load Emulation */
patch.resize(patch.size() + 3);
/* Rewrite MRS with B to trampoline */
*instruction = instr::B(patchOffset + index).raw;
*instruction = instr::B((end - patch) + offset, true).raw;
/* Write back Tegra X1 Counter Frequency and Return */
for (const auto &mov : instr::MoveRegister(regs::X(mrs.destReg), TegraX1Freq))
patch[index++] = mov;
patch[index] = instr::B(-(patchOffset + index - 1)).raw;
index++;
*patch++ = mov;
*patch = instr::B((end - patch) + offset + 1).raw;
patch++;
}
} else if (mrs.srcReg == CntpctEl0) {
/* Physical Counter Load Emulation (Without Rescaling) */
@ -250,31 +282,25 @@ namespace skyline::nce {
*instruction = instr::Mrs(CntvctEl0, regs::X(mrs.destReg)).raw;
}
}
} else if (msr.Verify()) {
if (msr.destReg == TpidrEl0) {
} else if (msr.Verify() && msr.destReg == TpidrEl0) {
/* Emulated TLS Register Store */
patch.resize(patch.size() + 6);
/* Rewrite MSR with B to trampoline */
*instruction = instr::B(patchOffset + index).raw;
*instruction = instr::B((end - patch) + offset, true).raw;
/* Allocate Scratch Registers */
bool x0x1{mrs.srcReg != regs::X0 && mrs.srcReg != regs::X1};
patch[index++] = x0x1 ? 0xA9BF07E0 : 0xA9BF0FE2; // STP X(0/2), X(1/3), [SP, #-16]!
*patch++ = x0x1 ? 0xA9BF07E0 : 0xA9BF0FE2; // STP X(0/2), X(1/3), [SP, #-16]!
/* Store new TLS value into ThreadContext */
patch[index++] = x0x1 ? 0xD53BD040 : 0xD53BD042; // MRS X(0/2), TPIDR_EL0
patch[index++] = instr::Mov(x0x1 ? regs::X1 : regs::X3, regs::X(msr.srcReg)).raw;
patch[index++] = x0x1 ? 0xF9015C01 : 0xF9015C03; // STR X(1/3), [X0, #0x4B8] (ThreadContext::tpidrEl0)
*patch++ = x0x1 ? 0xD53BD040 : 0xD53BD042; // MRS X(0/2), TPIDR_EL0
*patch++ = instr::Mov(x0x1 ? regs::X1 : regs::X3, regs::X(msr.srcReg)).raw;
*patch++ = x0x1 ? 0xF9015C01 : 0xF9015C03; // STR X(1/3), [X0, #0x4B8] (ThreadContext::tpidrEl0)
/* Restore Scratch Registers and Return */
patch[index++] = x0x1 ? 0xA8C107E0 : 0xA8C10FE2; // LDP X(0/2), X(1/3), [SP], #16
patch[index] = instr::B(-(patchOffset + index - 1)).raw;
index++;
*patch++ = x0x1 ? 0xA8C107E0 : 0xA8C10FE2; // LDP X(0/2), X(1/3), [SP], #16
*patch = instr::B((end - patch) + offset + 1).raw;
patch++;
}
}
patchOffset--;
}
return patch;
}
}

View File

@ -23,11 +23,17 @@ namespace skyline::nce {
void Execute();
struct PatchData {
size_t size; //!< Size of the .patch section
std::vector<size_t> offsets; //!< Offsets in .text of instructions that need to be patched
};
static PatchData GetPatchData(const std::vector<u8> &text);
/**
* @brief Generates a patch section for the supplied code
* @param baseAddress The address at which the code is mapped
* @param patchBase The offset of the patch section from the base address
* @brief Writes the .patch section and mutates the code accordingly
* @param patch A pointer to the .patch section which should be exactly patchSize in size and located before the .text section
*/
std::vector<u32> PatchCode(std::vector<u8> &code, u64 baseAddress, i64 patchBase);
static void PatchCode(std::vector<u8> &text, u32* patch, size_t patchSize, const std::vector<size_t>& offsets);
};
}

View File

@ -114,8 +114,8 @@ namespace skyline::nce {
/**
* @param offset The relative offset to branch to (In 32-bit units)
*/
constexpr B(i32 offset) {
this->offset = offset;
constexpr B(i64 offset, bool negate = false) {
this->offset = negate ? -offset : offset;
sig = 0x5;
}
@ -307,6 +307,8 @@ namespace skyline::nce {
instruction = instr::Movz(destination, offsetValue, offset).raw;
zeroed = true;
}
} else {
instruction = 0;
}
offset++;
}