mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-28 08:47:55 +03:00
Rework GPFIFO pushing to optimise performance and accuracy
* Pushbuffer data is now stored in a member buffer to avoid reallocating it for each pushbuffer which hampered performance before. * Don't prefetch pushbuffers as it puts unnecessary load on the guest thread that is better suited for the GPFIFO thread. * Clean up some misc code to avoid pointless casts of a 4 byte object and handle GPFIFO control opcodes.
This commit is contained in:
parent
78cdb1eeb4
commit
c1aec00ed1
@ -45,36 +45,51 @@ namespace skyline::gpu::gpfifo {
|
||||
}
|
||||
}
|
||||
|
||||
void GPFIFO::Process(const std::vector<u32> &segment) {
|
||||
for (auto entry{segment.begin()}; entry != segment.end(); entry++) {
|
||||
void GPFIFO::Process(GpEntry gpEntry) {
|
||||
if (!gpEntry.size) {
|
||||
// This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers
|
||||
switch (gpEntry.opcode) {
|
||||
case GpEntry::Opcode::Nop:
|
||||
return;
|
||||
default:
|
||||
state.logger->Warn("Unsupported GpEntry control opcode used: {}", static_cast<u8>(gpEntry.opcode));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
pushBufferData.resize(gpEntry.size);
|
||||
state.gpu->memoryManager.Read<u32>(pushBufferData, gpEntry.Address());
|
||||
|
||||
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
|
||||
// An entry containing all zeroes is a NOP, skip over it
|
||||
if (*entry == 0)
|
||||
continue;
|
||||
|
||||
auto methodHeader{reinterpret_cast<const PushBufferMethodHeader *>(&*entry)};
|
||||
PushBufferMethodHeader methodHeader{.raw = *entry};
|
||||
|
||||
switch (methodHeader->secOp) {
|
||||
switch (methodHeader.secOp) {
|
||||
case PushBufferMethodHeader::SecOp::IncMethod:
|
||||
for (u16 i{}; i < methodHeader->methodCount; i++)
|
||||
Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + i), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
|
||||
for (u16 i{}; i < methodHeader.methodCount; i++)
|
||||
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
|
||||
|
||||
break;
|
||||
case PushBufferMethodHeader::SecOp::NonIncMethod:
|
||||
for (u16 i{}; i < methodHeader->methodCount; i++)
|
||||
Send(MethodParams{methodHeader->methodAddress, *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
|
||||
for (u16 i{}; i < methodHeader.methodCount; i++)
|
||||
Send(MethodParams{methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
|
||||
|
||||
break;
|
||||
case PushBufferMethodHeader::SecOp::OneInc:
|
||||
for (u16 i{}; i < methodHeader->methodCount; i++)
|
||||
Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + static_cast<bool>(i)), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
|
||||
for (u16 i{}; i < methodHeader.methodCount; i++)
|
||||
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + static_cast<bool>(i)), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
|
||||
|
||||
break;
|
||||
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
|
||||
Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true});
|
||||
Send(MethodParams{methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true});
|
||||
break;
|
||||
case PushBufferMethodHeader::SecOp::EndPbSegment:
|
||||
return;
|
||||
default:
|
||||
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -91,12 +106,9 @@ namespace skyline::gpu::gpfifo {
|
||||
pthread_setname_np(pthread_self(), "GPFIFO");
|
||||
try {
|
||||
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
|
||||
pushBuffers->Process([this](PushBuffer &pushBuffer) {
|
||||
if (pushBuffer.segment.empty())
|
||||
pushBuffer.Fetch(state.gpu->memoryManager);
|
||||
|
||||
state.logger->Debug("Processing pushbuffer: 0x{:X}", pushBuffer.gpEntry.Address());
|
||||
Process(pushBuffer.segment);
|
||||
pushBuffers->Process([this](GpEntry gpEntry) {
|
||||
state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address());
|
||||
Process(gpEntry);
|
||||
});
|
||||
} catch (const signal::SignalException &e) {
|
||||
if (e.signal != SIGINT) {
|
||||
@ -112,12 +124,7 @@ namespace skyline::gpu::gpfifo {
|
||||
}
|
||||
|
||||
void GPFIFO::Push(span<GpEntry> entries) {
|
||||
bool beforeBarrier{true};
|
||||
pushBuffers->AppendTranform(entries, [&beforeBarrier, this](const GpEntry &entry) {
|
||||
if (entry.sync == GpEntry::Sync::Wait)
|
||||
beforeBarrier = false;
|
||||
return PushBuffer(entry, state.gpu->memoryManager, beforeBarrier);
|
||||
});
|
||||
pushBuffers->Append(entries);
|
||||
}
|
||||
|
||||
GPFIFO::~GPFIFO() {
|
||||
|
@ -129,35 +129,17 @@ namespace skyline::gpu {
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
|
||||
*/
|
||||
class GPFIFO {
|
||||
private:
|
||||
/**
|
||||
* @brief A pushbuffer is a descriptor of tasks that need to be executed for a specific client
|
||||
*/
|
||||
struct PushBuffer {
|
||||
GpEntry gpEntry;
|
||||
std::vector<u32> segment;
|
||||
|
||||
PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) {
|
||||
if (fetch)
|
||||
Fetch(memoryManager);
|
||||
}
|
||||
|
||||
inline void Fetch(const vmm::MemoryManager &memoryManager) {
|
||||
segment.resize(gpEntry.size);
|
||||
memoryManager.Read<u32>(segment, gpEntry.Address());
|
||||
}
|
||||
};
|
||||
|
||||
const DeviceState &state;
|
||||
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
|
||||
std::array<std::shared_ptr<engine::Engine>, 8> subchannels;
|
||||
std::optional<CircularQueue<PushBuffer>> pushBuffers;
|
||||
std::thread thread; //!< The thread that manages processing of push-buffers
|
||||
std::optional<CircularQueue<GpEntry>> pushBuffers;
|
||||
std::thread thread; //!< The thread that manages processing of pushbuffers
|
||||
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
|
||||
|
||||
/**
|
||||
* @brief Processes a pushbuffer segment, calling methods as needed
|
||||
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
|
||||
*/
|
||||
void Process(const std::vector<u32> &segment);
|
||||
void Process(GpEntry gpEntry);
|
||||
|
||||
/**
|
||||
* @brief Sends a method call to the GPU hardware
|
||||
|
Loading…
x
Reference in New Issue
Block a user