Heavily optimise GPFIFO command dispatch to reduce redundant checks

Previously for methods with count > 1 the subchannel and engine would be looked up for each part of the method rather than only doing so at the start. Each call also needed to be looked up to see if it touched a macro or GPFIFO method. Fix this by doing checks outside of the main dispatch loop with templated helper lambdas to avoid needing to repeat lots of code. Maxwell3D is the only subchannel with a fast path for now but more can be added later if needed.
This commit is contained in:
Billy Laws 2022-02-22 20:37:31 +00:00 committed by PixelyIon
parent b4927d0138
commit 7e16c1f989
2 changed files with 117 additions and 51 deletions

View File

@ -56,6 +56,26 @@ namespace skyline::soc::gm20b {
u32 _pad2_ : 29; u32 _pad2_ : 29;
SecOp secOp : 3; SecOp secOp : 3;
}; };
/**
* @brief Checks if a method is 'pure' i.e. does not touch macro or GPFIFO methods
*/
bool Pure() const {
u16 size{[&]() -> u16 {
switch (secOp) {
case SecOp::NonIncMethod:
case SecOp::ImmdDataMethod:
return 0;
case SecOp::OneInc:
return 1;
default:
return methodCount;
}
}()};
u16 end{static_cast<u16>(methodAddress + size)};
return end < engine::EngineMethodsEnd && methodAddress >= engine::GPFIFO::RegisterCount;
}
}; };
static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32)); static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
@ -66,20 +86,11 @@ namespace skyline::soc::gm20b {
gpEntries(numEntries), gpEntries(numEntries),
thread(std::thread(&ChannelGpfifo::Run, this)) {} thread(std::thread(&ChannelGpfifo::Run, this)) {}
void ChannelGpfifo::Send(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) { void ChannelGpfifo::SendFull(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
Logger::Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", method, argument, subChannel, lastCall);
if (method < engine::GPFIFO::RegisterCount) { if (method < engine::GPFIFO::RegisterCount) {
gpfifoEngine.CallMethod(method, argument); gpfifoEngine.CallMethod(method, argument);
} else if (method < engine::EngineMethodsEnd) { [[likely]] } else if (method < engine::EngineMethodsEnd) { [[likely]]
switch (subChannel) { SendPure(method, argument, subChannel);
case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethod(method, argument);
break;
default:
Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
break;
}
} else { } else {
switch (subChannel) { switch (subChannel) {
case SubchannelId::ThreeD: case SubchannelId::ThreeD:
@ -96,6 +107,17 @@ namespace skyline::soc::gm20b {
} }
} }
void ChannelGpfifo::SendPure(u32 method, u32 argument, SubchannelId subChannel) {
switch (subChannel) {
case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethod(method, argument);
break;
default:
Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
break;
}
}
void ChannelGpfifo::Process(GpEntry gpEntry) { void ChannelGpfifo::Process(GpEntry gpEntry) {
if (!gpEntry.size) { if (!gpEntry.size) {
// This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers
@ -119,18 +141,18 @@ namespace skyline::soc::gm20b {
switch (resumeState.state) { switch (resumeState.state) {
case MethodResumeState::State::Inc: case MethodResumeState::State::Inc:
while (entry != pushBufferData.end() && resumeState.remaining) while (entry != pushBufferData.end() && resumeState.remaining)
Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
break; break;
case MethodResumeState::State::OneInc: case MethodResumeState::State::OneInc:
Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
// After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
resumeState.state = MethodResumeState::State::NonInc; resumeState.state = MethodResumeState::State::NonInc;
[[fallthrough]]; [[fallthrough]];
case MethodResumeState::State::NonInc: case MethodResumeState::State::NonInc:
while (entry != pushBufferData.end() && resumeState.remaining) while (entry != pushBufferData.end() && resumeState.remaining)
Send(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); SendFull(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
break; break;
} }
@ -149,7 +171,7 @@ namespace skyline::soc::gm20b {
PushBufferMethodHeader methodHeader{.raw = *entry}; PushBufferMethodHeader methodHeader{.raw = *entry};
// Needed in order to check for methods split across multiple GpEntries // Needed in order to check for methods split across multiple GpEntries
auto remainingEntries{std::distance(entry, pushBufferData.end()) - 1}; ssize_t remainingEntries{std::distance(entry, pushBufferData.end()) - 1};
// Handles storing state and initial execution for methods that are split across multiple GpEntries // Handles storing state and initial execution for methods that are split across multiple GpEntries
auto startSplitMethod{[&](auto methodState) { auto startSplitMethod{[&](auto methodState) {
@ -166,47 +188,86 @@ namespace skyline::soc::gm20b {
resumeSplitMethod(); resumeSplitMethod();
}}; }};
/**
* @brief Handles execution of a specific method type as specified by the State template parameter
* @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
*/
auto dispatchCalls{[&]<bool ThreeDOnly, MethodResumeState::State State> () {
/**
* @brief Gets the offset to apply to the method address for a given dispatch loop index
*/
auto methodOffset{[] (u32 i) -> u32 {
switch (State) {
case MethodResumeState::State::Inc:
return i;
case MethodResumeState::State::OneInc:
return i ? 1 : 0;
case MethodResumeState::State::NonInc:
return 0;
}
}};
if (remainingEntries >= methodHeader.methodCount) {
if (methodHeader.Pure()) [[likely]] {
for (u32 i{}; i < methodHeader.methodCount; i++) {
if constexpr (ThreeDOnly) {
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress + methodOffset(i), *++entry);
} else {
SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
}
}
} else {
// Slow path for methods that touch GPFIFO or macros
for (u32 i{}; i < methodHeader.methodCount; i++)
SendFull(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
}
} else {
startSplitMethod(State);
return true;
}
return false;
}};
/**
* @brief Handles execution of a single method
* @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
* @return If the this was the final method in the current GpEntry
*/
auto processMethod{[&] <bool ThreeDOnly> () -> bool {
switch (methodHeader.secOp) { switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod: case PushBufferMethodHeader::SecOp::IncMethod:
if (remainingEntries >= methodHeader.methodCount) { return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::Inc>();
for (u32 i{}; i < methodHeader.methodCount; i++)
Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
break;
} else {
startSplitMethod(MethodResumeState::State::Inc);
return;
}
case PushBufferMethodHeader::SecOp::NonIncMethod: case PushBufferMethodHeader::SecOp::NonIncMethod:
if (remainingEntries >= methodHeader.methodCount) { return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::NonInc>();
for (u32 i{}; i < methodHeader.methodCount; i++)
Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
break;
} else {
startSplitMethod(MethodResumeState::State::NonInc);
return;
}
case PushBufferMethodHeader::SecOp::OneInc: case PushBufferMethodHeader::SecOp::OneInc:
if (remainingEntries >= methodHeader.methodCount) { return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::OneInc>();
for (u32 i{}; i < methodHeader.methodCount; i++)
Send(methodHeader.methodAddress + (i ? 1 : 0), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
break;
} else {
startSplitMethod(MethodResumeState::State::OneInc);
return;
}
case PushBufferMethodHeader::SecOp::ImmdDataMethod: case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true); if (methodHeader.Pure()) {
break; if constexpr (ThreeDOnly)
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress, methodHeader.immdData);
else
SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
} else {
SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
}
return false;
case PushBufferMethodHeader::SecOp::EndPbSegment: case PushBufferMethodHeader::SecOp::EndPbSegment:
return; return true;
default: default:
throw exception("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp)); throw exception("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
} }
}};
bool hitEnd{[&]() {
if (methodHeader.methodSubChannel == SubchannelId::ThreeD) [[likely]]
return processMethod.operator()<true>();
else
return processMethod.operator()<false>();
}()};
if (hitEnd)
return;
} }
} }

View File

@ -130,9 +130,14 @@ namespace skyline::soc::gm20b {
std::thread thread; //!< The thread that manages processing of pushbuffers std::thread thread; //!< The thread that manages processing of pushbuffers
/** /**
* @brief Sends a method call to the GPU hardware * @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods
*/ */
void Send(u32 method, u32 argument, SubchannelId subchannel, bool lastCall); void SendFull(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);
/**
* @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled
*/
void SendPure(u32 method, u32 argument, SubchannelId subchannel);
/** /**
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed