mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-27 05:57:54 +03:00
Implement macro HLE for instanced draw macros
gm20b performs instanced draws by repeating draw methods for each instance, the code to detect this together with the cost of interpreting macros took up around 6% of GPFIFO time in Metro Kingdom. By detecting these specific macros and performing an instanced draw directly much of that cost can be avoided.
This commit is contained in:
parent
cf0752f937
commit
3404a3abdb
app
CMakeLists.txt
src/main/cpp/skyline/soc/gm20b
@ -203,6 +203,7 @@ add_library(skyline SHARED
|
||||
${source_DIR}/skyline/soc/gm20b/channel.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/gmmu.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/macro/macro_state.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/macro/macro_interpreter.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/engines/engine.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
|
||||
|
@ -20,7 +20,7 @@ namespace skyline::soc::gm20b::engine {
|
||||
if (!(macroMethodOffset & 1)) {
|
||||
// Flush the current macro as we are switching to another one
|
||||
if (macroInvocation.Valid()) {
|
||||
macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this);
|
||||
macroState.Execute(macroInvocation.index, macroInvocation.arguments, this);
|
||||
macroInvocation.Reset();
|
||||
}
|
||||
|
||||
@ -32,7 +32,7 @@ namespace skyline::soc::gm20b::engine {
|
||||
|
||||
// Flush macro after all of the data in the method call has been sent
|
||||
if (lastCall && macroInvocation.Valid()) {
|
||||
macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this);
|
||||
macroState.Execute(macroInvocation.index, macroInvocation.arguments, this);
|
||||
macroInvocation.Reset();
|
||||
}
|
||||
};
|
||||
|
@ -61,15 +61,15 @@ namespace skyline::soc::gm20b::engine {
|
||||
MacroState ¯oState;
|
||||
|
||||
struct {
|
||||
size_t index{std::numeric_limits<size_t>::max()};
|
||||
u32 index{std::numeric_limits<u32>::max()};
|
||||
std::vector<u32> arguments;
|
||||
|
||||
bool Valid() {
|
||||
return index != std::numeric_limits<size_t>::max();
|
||||
return index != std::numeric_limits<u32>::max();
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
index = std::numeric_limits<size_t>::max();
|
||||
index = std::numeric_limits<u32>::max();
|
||||
arguments.clear();
|
||||
}
|
||||
} macroInvocation{}; //!< Data for a macro that is pending execution
|
||||
@ -88,6 +88,14 @@ namespace skyline::soc::gm20b::engine {
|
||||
*/
|
||||
virtual u32 ReadMethodFromMacro(u32 method) = 0;
|
||||
|
||||
virtual void DrawInstanced(bool setRegs, u32 drawTopology, u32 vertexArrayCount, u32 instanceCount, u32 vertexArrayStart, u32 globalBaseInstanceIndex) {
|
||||
throw exception("DrawInstanced is not implemented for this engine");
|
||||
}
|
||||
|
||||
virtual void DrawIndexedInstanced(bool setRegs, u32 drawTopology, u32 indexBufferCount, u32 instanceCount, u32 globalBaseVertexIndex, u32 indexBufferFirst, u32 globalBaseInstanceIndex) {
|
||||
throw exception("DrawIndexedInstanced is not implemented for this engine");
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Handles a call to a method in the MME space
|
||||
* @param macroMethodOffset The target offset from EngineMethodsEnd
|
||||
|
@ -323,4 +323,29 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
u32 Maxwell3D::ReadMethodFromMacro(u32 method) {
|
||||
return registers.raw[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::DrawInstanced(bool setRegs, u32 drawTopology, u32 vertexArrayCount, u32 instanceCount, u32 vertexArrayStart, u32 globalBaseInstanceIndex) {
|
||||
auto topology{static_cast<type::DrawTopology>(drawTopology)};
|
||||
if (setRegs) {
|
||||
registers.begin->op = topology;
|
||||
registers.drawVertexArray->count = vertexArrayCount;
|
||||
registers.vertexArrayStart = vertexArrayStart;
|
||||
registers.globalBaseInstanceIndex = globalBaseInstanceIndex;
|
||||
}
|
||||
|
||||
interconnect.Draw(topology, false, vertexArrayCount, vertexArrayStart, instanceCount, 0, globalBaseInstanceIndex);
|
||||
}
|
||||
|
||||
void Maxwell3D::DrawIndexedInstanced(bool setRegs, u32 drawTopology, u32 indexBufferCount, u32 instanceCount, u32 globalBaseVertexIndex, u32 indexBufferFirst, u32 globalBaseInstanceIndex) {
|
||||
auto topology{static_cast<type::DrawTopology>(drawTopology)};
|
||||
if (setRegs) {
|
||||
registers.begin->op = topology;
|
||||
registers.drawIndexBuffer->count = indexBufferCount;
|
||||
registers.indexBuffer->first = indexBufferFirst;
|
||||
registers.globalBaseVertexIndex = globalBaseVertexIndex;
|
||||
registers.globalBaseInstanceIndex = globalBaseInstanceIndex;
|
||||
}
|
||||
|
||||
interconnect.Draw(topology, true, indexBufferCount, indexBufferFirst, instanceCount, globalBaseVertexIndex, globalBaseInstanceIndex);
|
||||
}
|
||||
}
|
||||
|
@ -392,5 +392,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
void CallMethodFromMacro(u32 method, u32 argument) override;
|
||||
|
||||
u32 ReadMethodFromMacro(u32 method) override;
|
||||
|
||||
void DrawInstanced(bool setRegs, u32 drawTopology, u32 vertexArrayCount, u32 instanceCount, u32 vertexArrayStart, u32 globalBaseInstanceIndex) override;
|
||||
|
||||
void DrawIndexedInstanced(bool setRegs, u32 drawTopology, u32 indexBufferCount, u32 instanceCount, u32 globalBaseVertexIndex, u32 indexBufferFirst, u32 globalBaseInstanceIndex) override;
|
||||
};
|
||||
}
|
||||
|
83
app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp
Normal file
83
app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 yuzu Emulator Project (https://yuzu-emu.org/)
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <soc/gm20b/engines/engine.h>
|
||||
#include "macro_state.h"
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
namespace macro_hle {
|
||||
void DrawInstanced(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine) {
|
||||
u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & args[2]};
|
||||
|
||||
targetEngine->DrawInstanced(true, args[0], args[1], instanceCount, args[3], args[4]);
|
||||
}
|
||||
|
||||
void DrawIndexedInstanced(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine) {
|
||||
u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & args[2]};
|
||||
|
||||
targetEngine->DrawIndexedInstanced(true, args[0], args[1], instanceCount, args[3], args[4], args[5]);
|
||||
}
|
||||
|
||||
void DrawInstancedIndexedWithConstantBuffer(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine) {
|
||||
// Writes globalBaseVertexIndex and globalBaseInstanceIndex to the bound constant buffer before performing a standard instanced indexed draw
|
||||
u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & args[2]};
|
||||
targetEngine->CallMethodFromMacro(0x8e3, 0x640);
|
||||
targetEngine->CallMethodFromMacro(0x8e4, args[4]);
|
||||
targetEngine->CallMethodFromMacro(0x8e5, args[5]);
|
||||
targetEngine->DrawIndexedInstanced(false, args[0], args[1], instanceCount, args[4], args[3], args[5]);
|
||||
targetEngine->CallMethodFromMacro(0x8e3, 0x640);
|
||||
targetEngine->CallMethodFromMacro(0x8e4, 0x0);
|
||||
targetEngine->CallMethodFromMacro(0x8e5, 0x0);
|
||||
}
|
||||
|
||||
struct HleFunctionInfo {
|
||||
Function function;
|
||||
u64 size;
|
||||
u32 hash;
|
||||
};
|
||||
|
||||
constexpr std::array<HleFunctionInfo, 0x3> functions{{
|
||||
{DrawInstanced, 0x12, 0x6F0DD310},
|
||||
{DrawIndexedInstanced, 0x17, 0x2764C4F},
|
||||
{DrawInstancedIndexedWithConstantBuffer, 0x1F, 0xF2F16988},
|
||||
}};
|
||||
|
||||
static Function LookupFunction(span<u32> code) {
|
||||
for (const auto &function : functions) {
|
||||
if (function.size > code.size())
|
||||
continue;
|
||||
|
||||
auto macro{code.subspan(0, function.size)};
|
||||
|
||||
if (XXH32(code.data(), code.size_bytes(), 0) == function.hash)
|
||||
return function.function;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
void MacroState::Invalidate() {
|
||||
invalidatePending = true;
|
||||
}
|
||||
|
||||
void MacroState::Execute(u32 position, span<u32> args, engine::MacroEngineBase *targetEngine) {
|
||||
size_t offset{macroPositions[position]};
|
||||
|
||||
if (invalidatePending)
|
||||
macroHleFunctions.fill({});
|
||||
|
||||
auto &hleEntry{macroHleFunctions[position]};
|
||||
|
||||
if (!hleEntry.valid) {
|
||||
hleEntry.function = macro_hle::LookupFunction(span(macroCode).subspan(offset));
|
||||
hleEntry.valid = true;
|
||||
}
|
||||
|
||||
if (macroHleFunctions[position].function)
|
||||
macroHleFunctions[position].function(offset, args, targetEngine);
|
||||
else
|
||||
macroInterpreter.Execute(offset, args, targetEngine);
|
||||
}
|
||||
}
|
@ -7,14 +7,29 @@
|
||||
#include "macro_interpreter.h"
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
namespace macro_hle {
|
||||
using Function = void (*)(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Holds per-channel macro state
|
||||
*/
|
||||
struct MacroState {
|
||||
struct MacroHleEntry {
|
||||
macro_hle::Function function;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
engine::MacroInterpreter macroInterpreter; //!< The macro interpreter for handling 3D/2D macros
|
||||
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
|
||||
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time
|
||||
std::array<MacroHleEntry, 0x80> macroHleFunctions{}; //!< The HLE functions for each macro position, used to optionally override the interpreter
|
||||
bool invalidatePending{};
|
||||
|
||||
MacroState() : macroInterpreter(macroCode) {}
|
||||
|
||||
void Invalidate();
|
||||
|
||||
void Execute(u32 position, span<u32> args, engine::MacroEngineBase *targetEngine);
|
||||
};
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user