mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-29 18:55:28 +03:00
Implement basic inline2memory engine support
Not currently used by anything but will be used by both compute, 3D and its own engine in the future. Block linear copies are currently unsupported.
This commit is contained in:
parent
5c387f5c5a
commit
8c73b62b2c
@ -183,6 +183,7 @@ add_library(skyline SHARED
|
|||||||
${source_DIR}/skyline/soc/gm20b/engines/engine.cpp
|
${source_DIR}/skyline/soc/gm20b/engines/engine.cpp
|
||||||
${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
|
${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
|
||||||
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
|
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
|
||||||
|
${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp
|
||||||
${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp
|
${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp
|
||||||
${source_DIR}/skyline/input/npad.cpp
|
${source_DIR}/skyline/input/npad.cpp
|
||||||
${source_DIR}/skyline/input/npad_device.cpp
|
${source_DIR}/skyline/input/npad_device.cpp
|
||||||
|
@ -10,6 +10,20 @@ namespace skyline::soc::gm20b {
|
|||||||
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
|
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
|
||||||
|
|
||||||
namespace engine {
|
namespace engine {
|
||||||
|
/**
|
||||||
|
* @brief A 40-bit GMMU virtual address with register-packing
|
||||||
|
* @note The registers pack the address with big-endian ordering (but with 32 bit words)
|
||||||
|
*/
|
||||||
|
struct Address {
|
||||||
|
u32 high;
|
||||||
|
u32 low;
|
||||||
|
|
||||||
|
operator u64() {
|
||||||
|
return (static_cast<u64>(high) << 32) | low;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
static_assert(sizeof(Address) == sizeof(u64));
|
||||||
|
|
||||||
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
|
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
92
app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp
Normal file
92
app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
|
#include <soc/gm20b/gmmu.h>
|
||||||
|
#include "inline2memory.h"
|
||||||
|
|
||||||
|
namespace skyline::soc::gm20b::engine {
|
||||||
|
Inline2MemoryBackend::Inline2MemoryBackend(std::shared_ptr<AddressSpaceContext> addressSpaceContext) : addressSpaceContext(std::move(addressSpaceContext)) {}
|
||||||
|
|
||||||
|
void Inline2MemoryBackend::LaunchDma(Inline2MemoryBackend::RegisterState &state) {
|
||||||
|
writeOffset = 0;
|
||||||
|
size_t targetSizeWords{(state.lineCount * util::AlignUp(state.lineLengthIn, 4)) / 4};
|
||||||
|
buffer.resize(targetSizeWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Inline2MemoryBackend::CompleteDma(Inline2MemoryBackend::RegisterState &state) {
|
||||||
|
if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore)
|
||||||
|
throw exception("Semaphore release on I2M completion is not supported!");
|
||||||
|
|
||||||
|
if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch && state.lineCount == 1) {
|
||||||
|
// TODO: we can do this with the buffer manager to avoid some overhead in the future
|
||||||
|
Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4);
|
||||||
|
addressSpaceContext->gmmu.Write(state.offsetOut, buffer);
|
||||||
|
} else {
|
||||||
|
Logger::Warn("Non-linear I2M uploads are not supported!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Inline2MemoryBackend::LoadInlineData(RegisterState &state, u32 value) {
|
||||||
|
if (writeOffset >= buffer.size())
|
||||||
|
throw exception("Inline data load overflow!");
|
||||||
|
|
||||||
|
buffer[writeOffset++] = value;
|
||||||
|
|
||||||
|
if (writeOffset == buffer.size())
|
||||||
|
CompleteDma(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Inline2MemoryBackend::LoadInlineData(Inline2MemoryBackend::RegisterState &state, span<u32> data) {
|
||||||
|
if (writeOffset + data.size() > buffer.size())
|
||||||
|
throw exception("Inline data load overflow!");
|
||||||
|
|
||||||
|
span(buffer).subspan(writeOffset).copy_from(data);
|
||||||
|
writeOffset += data.size();
|
||||||
|
|
||||||
|
if (writeOffset == buffer.size())
|
||||||
|
CompleteDma(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
Inline2Memory::Inline2Memory(std::shared_ptr<AddressSpaceContext> addressSpaceContext) : backend(std::move(addressSpaceContext)) {}
|
||||||
|
|
||||||
|
__attribute__((always_inline)) void Inline2Memory::CallMethod(u32 method, u32 argument) {
|
||||||
|
Logger::Verbose("Called method in I2M: 0x{:X} args: 0x{:X}", method, argument);
|
||||||
|
|
||||||
|
HandleMethod(method, argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define INLINE2MEMORY_OFFSET(field) (sizeof(typeof(Registers::field)) - sizeof(std::remove_reference_t<decltype(*Registers::field)>)) / sizeof(u32)
|
||||||
|
#define INLINE2MEMORY_STRUCT_OFFSET(field, member) INLINE2MEMORY_OFFSET(field) + U32_OFFSET(std::remove_reference_t<decltype(*Registers::field)>, member)
|
||||||
|
|
||||||
|
void Inline2Memory::HandleMethod(u32 method, u32 argument) {
|
||||||
|
registers.raw[method] = argument;
|
||||||
|
|
||||||
|
switch (method) {
|
||||||
|
case INLINE2MEMORY_STRUCT_OFFSET(i2m, launchDma):
|
||||||
|
backend.LaunchDma(*registers.i2m);
|
||||||
|
return;
|
||||||
|
case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData):
|
||||||
|
backend.LoadInlineData(*registers.i2m, argument);
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void Inline2Memory::CallMethodBatchNonInc(u32 method, span<u32> arguments) {
|
||||||
|
switch (method) {
|
||||||
|
case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData):
|
||||||
|
backend.LoadInlineData(*registers.i2m, arguments);
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u32 argument : arguments)
|
||||||
|
HandleMethod(method, argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef INLINE2MEMORY_STRUCT_OFFSET
|
||||||
|
#undef INLINE2MEMORY_OFFSET
|
||||||
|
}
|
173
app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h
Normal file
173
app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <common.h>
|
||||||
|
#include "engine.h"
|
||||||
|
|
||||||
|
namespace skyline::soc::gm20b {
|
||||||
|
struct AddressSpaceContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace skyline::soc::gm20b::engine {
|
||||||
|
/**
|
||||||
|
* @brief Implements the actual behaviour of the I2M engine, allowing it to be shared between other engines which also contain the I2M block (3D, compute)
|
||||||
|
*/
|
||||||
|
class Inline2MemoryBackend {
|
||||||
|
private:
|
||||||
|
std::vector<u32> buffer; //!< Temporary buffer to hold data being currently uploaded
|
||||||
|
u32 writeOffset{}; //!< Current write offset in words into `buffer`
|
||||||
|
std::shared_ptr<AddressSpaceContext> addressSpaceContext;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief The I2M register state that can be included as part of an engines register state
|
||||||
|
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def
|
||||||
|
*/
|
||||||
|
struct RegisterState {
|
||||||
|
enum class BlockWidth : u8 {
|
||||||
|
OneGob = 0
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BlockHeight : u8 {
|
||||||
|
OneGob = 0,
|
||||||
|
TwoGobs = 1,
|
||||||
|
FourGobs = 2,
|
||||||
|
EightGobs = 3,
|
||||||
|
SixteenGobs = 4,
|
||||||
|
ThirtyTwoGobs = 5
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BlockDepth : u8 {
|
||||||
|
OneGob = 0,
|
||||||
|
TwoGobs = 1,
|
||||||
|
FourGobs = 2,
|
||||||
|
EightGobs = 3,
|
||||||
|
SixteenGobs = 4,
|
||||||
|
ThirtyTwoGobs = 5
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DmaDstMemoryLayout : u8 {
|
||||||
|
BlockLinear = 0,
|
||||||
|
Pitch = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DmaReductionFormat : u8 {
|
||||||
|
Unsigned32 = 0,
|
||||||
|
Signed32 = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DmaCompletionType : u8 {
|
||||||
|
FlushDisable = 0,
|
||||||
|
FlushOnly = 1,
|
||||||
|
ReleaseSemaphore = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DmaInterruptType : u8 {
|
||||||
|
None = 0,
|
||||||
|
Interrupt = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DmaSemaphoreStructSize : u8 {
|
||||||
|
FourWords = 0,
|
||||||
|
OneWord = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DmaReductionOp : u8 {
|
||||||
|
Add = 0,
|
||||||
|
Min = 1,
|
||||||
|
Max = 2,
|
||||||
|
Inc = 3,
|
||||||
|
Dec = 4,
|
||||||
|
And = 5,
|
||||||
|
Or = 6,
|
||||||
|
Xor = 7
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 lineLengthIn;
|
||||||
|
u32 lineCount;
|
||||||
|
Address offsetOut;
|
||||||
|
u32 pitchOut;
|
||||||
|
struct {
|
||||||
|
BlockWidth width : 4;
|
||||||
|
BlockHeight height : 4;
|
||||||
|
BlockDepth depth : 4;
|
||||||
|
u32 _pad1_ : 20;
|
||||||
|
} dstBlockSize;
|
||||||
|
u32 dstWidth;
|
||||||
|
u32 dstHeight;
|
||||||
|
u32 dstDepth;
|
||||||
|
u32 dstLayer;
|
||||||
|
u32 originBytesX;
|
||||||
|
u32 originSamplesY;
|
||||||
|
struct {
|
||||||
|
DmaDstMemoryLayout layout : 1;
|
||||||
|
bool reductionEnable : 1;
|
||||||
|
DmaReductionFormat format : 2;
|
||||||
|
DmaCompletionType completion : 2;
|
||||||
|
bool sysmemBarDisable : 1;
|
||||||
|
u8 _pad0_ : 1;
|
||||||
|
DmaInterruptType interrupt : 2;
|
||||||
|
u8 _pad1_ : 2;
|
||||||
|
DmaSemaphoreStructSize semaphore : 1;
|
||||||
|
DmaReductionOp reductionOp : 3;
|
||||||
|
} launchDma;
|
||||||
|
u32 loadInlineData;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(RegisterState) == (0xE * 0x4));
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Ran after all the inline data has been pushed and handles writing that data into memory
|
||||||
|
*/
|
||||||
|
void CompleteDma(RegisterState &state);
|
||||||
|
|
||||||
|
public:
|
||||||
|
Inline2MemoryBackend(std::shared_ptr<AddressSpaceContext> addressSpaceContext);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Should be called when launchDma in `state` is written to
|
||||||
|
*/
|
||||||
|
void LaunchDma(RegisterState &state);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Should be called when loadInlineData in `state` is written to (non batch version)
|
||||||
|
*/
|
||||||
|
void LoadInlineData(RegisterState &state, u32 value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Should be called when loadInlineData in `state` is written to (batch version)
|
||||||
|
*/
|
||||||
|
void LoadInlineData(RegisterState &state, span<u32> data);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Implements the actual I2M engine block that is located on subchannel 2 and handles uploading data from a pushbuffer into GPU memory
|
||||||
|
*/
|
||||||
|
class Inline2Memory {
|
||||||
|
private:
|
||||||
|
Inline2MemoryBackend backend;
|
||||||
|
|
||||||
|
void HandleMethod(u32 method, u32 argument);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def
|
||||||
|
*/
|
||||||
|
union Registers {
|
||||||
|
std::array<u32, EngineMethodsEnd> raw;
|
||||||
|
|
||||||
|
template<size_t Offset, typename Type>
|
||||||
|
using Register = util::OffsetMember<Offset, Type, u32>;
|
||||||
|
|
||||||
|
Register<0x60, Inline2MemoryBackend::RegisterState> i2m;
|
||||||
|
} registers{};
|
||||||
|
|
||||||
|
public:
|
||||||
|
Inline2Memory(std::shared_ptr<AddressSpaceContext> addressSpaceContext);
|
||||||
|
|
||||||
|
void CallMethod(u32 method, u32 argument);
|
||||||
|
|
||||||
|
void CallMethodBatchNonInc(u32 method, span<u32> arguments);
|
||||||
|
};
|
||||||
|
}
|
@ -5,24 +5,11 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <common.h>
|
#include <common.h>
|
||||||
|
#include <soc/gm20b/engines/engine.h>
|
||||||
|
|
||||||
namespace skyline::soc::gm20b::engine::maxwell3d::type {
|
namespace skyline::soc::gm20b::engine::maxwell3d::type {
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief A 40-bit GMMU virtual address with register-packing
|
|
||||||
* @note The registers pack the address with big-endian ordering (but with 32 bit words)
|
|
||||||
*/
|
|
||||||
struct Address {
|
|
||||||
u32 high;
|
|
||||||
u32 low;
|
|
||||||
|
|
||||||
operator u64() {
|
|
||||||
return (static_cast<u64>(high) << 32) | low;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
static_assert(sizeof(Address) == sizeof(u64));
|
|
||||||
|
|
||||||
enum class MmeShadowRamControl : u32 {
|
enum class MmeShadowRamControl : u32 {
|
||||||
MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM
|
MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM
|
||||||
MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter
|
MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter
|
||||||
|
@ -108,7 +108,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
|
|
||||||
Register<0x3EB, u32> rtSeparateFragData;
|
Register<0x3EB, u32> rtSeparateFragData;
|
||||||
|
|
||||||
Register<0x3F8, type::Address> depthTargetAddress;
|
Register<0x3F8, Address> depthTargetAddress;
|
||||||
Register<0x3FA, type::DepthRtFormat> depthTargetFormat;
|
Register<0x3FA, type::DepthRtFormat> depthTargetFormat;
|
||||||
Register<0x3FB, type::RenderTargetTileMode> depthTargetTileMode;
|
Register<0x3FB, type::RenderTargetTileMode> depthTargetTileMode;
|
||||||
Register<0x3FC, u32> depthTargetLayerStride;
|
Register<0x3FC, u32> depthTargetLayerStride;
|
||||||
@ -193,7 +193,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
Register<0x54F, type::MultisampleControl> multisampleControl;
|
Register<0x54F, type::MultisampleControl> multisampleControl;
|
||||||
|
|
||||||
struct SamplerPool {
|
struct SamplerPool {
|
||||||
type::Address address; // 0x557
|
Address address; // 0x557
|
||||||
u32 maximumIndex; // 0x559
|
u32 maximumIndex; // 0x559
|
||||||
};
|
};
|
||||||
Register<0x557, SamplerPool> samplerPool;
|
Register<0x557, SamplerPool> samplerPool;
|
||||||
@ -202,7 +202,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
Register<0x55C, u32> lineSmoothEnable;
|
Register<0x55C, u32> lineSmoothEnable;
|
||||||
|
|
||||||
struct TexturePool {
|
struct TexturePool {
|
||||||
type::Address address; // 0x55D
|
Address address; // 0x55D
|
||||||
u32 maximumIndex; // 0x55F
|
u32 maximumIndex; // 0x55F
|
||||||
};
|
};
|
||||||
Register<0x55D, TexturePool> texturePool;
|
Register<0x55D, TexturePool> texturePool;
|
||||||
@ -220,7 +220,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
Register<0x56F, float> depthBiasUnits;
|
Register<0x56F, float> depthBiasUnits;
|
||||||
|
|
||||||
Register<0x581, type::PointCoordReplace> pointCoordReplace;
|
Register<0x581, type::PointCoordReplace> pointCoordReplace;
|
||||||
Register<0x582, type::Address> setProgramRegion;
|
Register<0x582, Address> setProgramRegion;
|
||||||
|
|
||||||
Register<0x585, u32> vertexEndGl; //!< Method-only register with no real value, used after calling vertexBeginGl to invoke the draw
|
Register<0x585, u32> vertexEndGl; //!< Method-only register with no real value, used after calling vertexBeginGl to invoke the draw
|
||||||
Register<0x586, type::VertexBeginGl> vertexBeginGl; //!< Similar to glVertexBegin semantically, supplies a primitive topology for draws alongside instancing data
|
Register<0x586, type::VertexBeginGl> vertexBeginGl; //!< Similar to glVertexBegin semantically, supplies a primitive topology for draws alongside instancing data
|
||||||
@ -256,7 +256,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
Register<0x680, std::array<type::ColorWriteMask, type::RenderTargetCount>> colorWriteMask;
|
Register<0x680, std::array<type::ColorWriteMask, type::RenderTargetCount>> colorWriteMask;
|
||||||
|
|
||||||
struct Semaphore {
|
struct Semaphore {
|
||||||
type::Address address; // 0x6C0
|
Address address; // 0x6C0
|
||||||
u32 payload; // 0x6C2
|
u32 payload; // 0x6C2
|
||||||
type::SemaphoreInfo info; // 0x6C3
|
type::SemaphoreInfo info; // 0x6C3
|
||||||
};
|
};
|
||||||
@ -270,7 +270,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
u32 enable : 1;
|
u32 enable : 1;
|
||||||
};
|
};
|
||||||
} config;
|
} config;
|
||||||
type::Address iova;
|
Address iova;
|
||||||
u32 divisor;
|
u32 divisor;
|
||||||
};
|
};
|
||||||
static_assert(sizeof(VertexBuffer) == sizeof(u32) * 4);
|
static_assert(sizeof(VertexBuffer) == sizeof(u32) * 4);
|
||||||
@ -288,7 +288,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
};
|
};
|
||||||
Register<0x780, std::array<IndependentBlend, type::RenderTargetCount>> independentBlend;
|
Register<0x780, std::array<IndependentBlend, type::RenderTargetCount>> independentBlend;
|
||||||
|
|
||||||
Register<0x7C0, std::array<type::Address, type::VertexBufferCount>> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer
|
Register<0x7C0, std::array<Address, type::VertexBufferCount>> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer
|
||||||
|
|
||||||
Register<0x800, std::array<type::SetProgramInfo, type::ShaderStageCount>> setProgram;
|
Register<0x800, std::array<type::SetProgramInfo, type::ShaderStageCount>> setProgram;
|
||||||
|
|
||||||
@ -296,7 +296,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
|
|
||||||
struct ConstantBufferSelector {
|
struct ConstantBufferSelector {
|
||||||
u32 size;
|
u32 size;
|
||||||
type::Address address;
|
Address address;
|
||||||
};
|
};
|
||||||
Register<0x8E0, ConstantBufferSelector> constantBufferSelector;
|
Register<0x8E0, ConstantBufferSelector> constantBufferSelector;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user