mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-26 23:45:30 +03:00
Move Guest GPU into SoC Directory
We decided to restructure Skyline to draw a layer of separation between guest and host GPU. We're reserving the `gpu` namespace and directory for purely host GPU and creating a new `soc` directory and namespace for emulation of parts of the X1 SoC which is currently limited to guest GPU but will be expanded to contain components like the audio DSP down the line.
This commit is contained in:
parent
0ea6d9bee5
commit
cbe9bc5f25
16
.idea/codeStyles/Project.xml
generated
16
.idea/codeStyles/Project.xml
generated
@ -7,22 +7,6 @@
|
||||
<option name="FORMATTER_OFF_TAG" value="@fmt:off" />
|
||||
<option name="SOFT_MARGINS" value="80,140" />
|
||||
<JetCodeStyleSettings>
|
||||
<option name="PACKAGES_TO_USE_STAR_IMPORTS">
|
||||
<value>
|
||||
<package name="java.util" alias="false" withSubpackages="false" />
|
||||
<package name="kotlinx.android.synthetic" alias="false" withSubpackages="true" />
|
||||
<package name="io.ktor" alias="false" withSubpackages="true" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="PACKAGES_IMPORT_LAYOUT">
|
||||
<value>
|
||||
<package name="" alias="false" withSubpackages="true" />
|
||||
<package name="java" alias="false" withSubpackages="true" />
|
||||
<package name="javax" alias="false" withSubpackages="true" />
|
||||
<package name="kotlin" alias="false" withSubpackages="true" />
|
||||
<package name="" alias="true" withSubpackages="true" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="SPACE_BEFORE_TYPE_COLON" value="true" />
|
||||
<option name="CODE_STYLE_DEFAULTS" value="KOTLIN_OFFICIAL" />
|
||||
</JetCodeStyleSettings>
|
||||
|
2
.idea/inspectionProfiles/Project_Default.xml
generated
2
.idea/inspectionProfiles/Project_Default.xml
generated
@ -171,7 +171,7 @@
|
||||
</inspection_tool>
|
||||
<inspection_tool class="CheckedExceptionClass" enabled="true" level="WARNING" enabled_by_default="true" />
|
||||
<inspection_tool class="ClangTidy" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-static-cast-downcast,cppcoreguidelines-slicing,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-headers,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
|
||||
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err58-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-member-init,cppcoreguidelines-pro-type-static-cast-downcast,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
|
||||
</inspection_tool>
|
||||
<inspection_tool class="ClassComplexity" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="m_limit" value="80" />
|
||||
|
@ -67,12 +67,12 @@ add_library(skyline SHARED
|
||||
${source_DIR}/skyline/audio/resampler.cpp
|
||||
${source_DIR}/skyline/audio/adpcm_decoder.cpp
|
||||
${source_DIR}/skyline/gpu/presentation_engine.cpp
|
||||
${source_DIR}/skyline/gpu/macro_interpreter.cpp
|
||||
${source_DIR}/skyline/gpu/memory_manager.cpp
|
||||
${source_DIR}/skyline/gpu/gpfifo.cpp
|
||||
${source_DIR}/skyline/gpu/syncpoint.cpp
|
||||
${source_DIR}/skyline/gpu/texture.cpp
|
||||
${source_DIR}/skyline/gpu/engines/maxwell_3d.cpp
|
||||
${source_DIR}/skyline/soc/gmmu.cpp
|
||||
${source_DIR}/skyline/soc/host1x/syncpoint.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
|
||||
${source_DIR}/skyline/input/npad.cpp
|
||||
${source_DIR}/skyline/input/npad_device.cpp
|
||||
${source_DIR}/skyline/input/touch.cpp
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <android/log.h>
|
||||
#include "common.h"
|
||||
#include "nce.h"
|
||||
#include "soc.h"
|
||||
#include "gpu.h"
|
||||
#include "audio.h"
|
||||
#include "input.h"
|
||||
@ -55,6 +56,7 @@ namespace skyline {
|
||||
DeviceState::DeviceState(kernel::OS *os, std::shared_ptr<JvmManager> jvmManager, std::shared_ptr<Settings> settings, std::shared_ptr<Logger> logger)
|
||||
: os(os), jvm(std::move(jvmManager)), settings(std::move(settings)), logger(std::move(logger)) {
|
||||
// We assign these later as they use the state in their constructor and we don't want null pointers
|
||||
soc = std::make_shared<soc::SOC>(*this);
|
||||
gpu = std::make_shared<gpu::GPU>(*this);
|
||||
audio = std::make_shared<audio::Audio>(*this);
|
||||
nce = std::make_shared<nce::NCE>(*this);
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include <frozen/string.h>
|
||||
#include <jni.h>
|
||||
|
||||
#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
|
||||
#define FORCE_INLINE __attribute__((always_inline)) // NOLINT(cppcoreguidelines-macro-usage)
|
||||
|
||||
namespace fmt {
|
||||
/**
|
||||
@ -605,6 +605,9 @@ namespace skyline {
|
||||
struct ThreadContext;
|
||||
}
|
||||
class JvmManager;
|
||||
namespace soc {
|
||||
class SOC;
|
||||
}
|
||||
namespace gpu {
|
||||
class GPU;
|
||||
}
|
||||
@ -637,6 +640,7 @@ namespace skyline {
|
||||
std::shared_ptr<Settings> settings;
|
||||
std::shared_ptr<Logger> logger;
|
||||
std::shared_ptr<loader::Loader> loader;
|
||||
std::shared_ptr<soc::SOC> soc;
|
||||
std::shared_ptr<gpu::GPU> gpu;
|
||||
std::shared_ptr<audio::Audio> audio;
|
||||
std::shared_ptr<nce::NCE> nce;
|
||||
|
@ -3,30 +3,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "gpu/gpfifo.h"
|
||||
#include "gpu/syncpoint.h"
|
||||
#include "gpu/engines/maxwell_3d.h"
|
||||
#include "gpu/presentation_engine.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
/**
|
||||
* @brief A common interfaces to the GPU where all objects relevant to it are present
|
||||
* @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this
|
||||
*/
|
||||
class GPU {
|
||||
private:
|
||||
const DeviceState &state;
|
||||
|
||||
public:
|
||||
PresentationEngine presentation;
|
||||
vmm::MemoryManager memoryManager;
|
||||
std::shared_ptr<engine::Engine> fermi2D;
|
||||
std::shared_ptr<engine::Maxwell3D> maxwell3D;
|
||||
std::shared_ptr<engine::Engine> maxwellCompute;
|
||||
std::shared_ptr<engine::Engine> maxwellDma;
|
||||
std::shared_ptr<engine::Engine> keplerMemory;
|
||||
std::array<Syncpoint, constant::MaxHwSyncpointCount> syncpoints{};
|
||||
gpfifo::GPFIFO gpfifo;
|
||||
|
||||
GPU(const DeviceState &state) : state(state), presentation(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared<engine::Engine>(state)), keplerMemory(std::make_shared<engine::Engine>(state)), maxwell3D(std::make_shared<engine::Maxwell3D>(state)), maxwellCompute(std::make_shared<engine::Engine>(state)), maxwellDma(std::make_shared<engine::Engine>(state)) {}
|
||||
GPU(const DeviceState &state) : presentation(state) {}
|
||||
};
|
||||
}
|
||||
|
@ -1,179 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "engine.h"
|
||||
|
||||
namespace skyline {
|
||||
namespace constant {
|
||||
constexpr u32 GpfifoRegisterCount{0x40}; //!< The number of GPFIFO registers
|
||||
}
|
||||
|
||||
namespace gpu::engine {
|
||||
/**
|
||||
* @brief The GPFIFO engine handles managing macros and semaphores
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
|
||||
*/
|
||||
class GPFIFO : public Engine {
|
||||
private:
|
||||
/**
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
|
||||
*/
|
||||
#pragma pack(push, 1)
|
||||
union Registers {
|
||||
std::array<u32, constant::GpfifoRegisterCount> raw;
|
||||
|
||||
enum class SemaphoreOperation : u8 {
|
||||
Acquire = 1,
|
||||
Release = 2,
|
||||
AcqGeq = 4,
|
||||
AcqAnd = 8,
|
||||
Reduction = 16,
|
||||
};
|
||||
|
||||
enum class SemaphoreAcquireSwitch : u8 {
|
||||
Disabled = 0,
|
||||
Enabled = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReleaseWfi : u8 {
|
||||
En = 0,
|
||||
Dis = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReleaseSize : u8 {
|
||||
SixteenBytes = 0,
|
||||
FourBytes = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReduction : u8 {
|
||||
Min = 0,
|
||||
Max = 1,
|
||||
Xor = 2,
|
||||
And = 3,
|
||||
Or = 4,
|
||||
Add = 5,
|
||||
Inc = 6,
|
||||
Dec = 7,
|
||||
};
|
||||
|
||||
enum class SemaphoreFormat : u8 {
|
||||
Signed = 0,
|
||||
Unsigned = 1,
|
||||
};
|
||||
|
||||
enum class MemOpTlbInvalidatePdb : u8 {
|
||||
One = 0,
|
||||
All = 1,
|
||||
};
|
||||
|
||||
enum class SyncpointOperation : u8 {
|
||||
Wait = 0,
|
||||
Incr = 1,
|
||||
};
|
||||
|
||||
enum class SyncpointWaitSwitch : u8 {
|
||||
Dis = 0,
|
||||
En = 1,
|
||||
};
|
||||
|
||||
enum class WfiScope : u8 {
|
||||
CurrentScgType = 0,
|
||||
All = 1,
|
||||
};
|
||||
|
||||
enum class YieldOp : u8 {
|
||||
Nop = 0,
|
||||
PbdmaTimeslice = 1,
|
||||
RunlistTimeslice = 2,
|
||||
Tsg = 3,
|
||||
};
|
||||
|
||||
struct {
|
||||
struct {
|
||||
u16 nvClass : 16;
|
||||
u8 engine : 5;
|
||||
u16 _pad_ : 11;
|
||||
} setObject;
|
||||
|
||||
u32 illegal;
|
||||
u32 nop;
|
||||
u32 _pad0_;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
u32 offsetUpper : 8;
|
||||
u32 _pad0_ : 24;
|
||||
};
|
||||
|
||||
struct {
|
||||
u8 _pad1_ : 2;
|
||||
u32 offsetLower : 30;
|
||||
};
|
||||
|
||||
u32 payload;
|
||||
|
||||
struct {
|
||||
SemaphoreOperation operation : 5;
|
||||
u8 _pad2_ : 7;
|
||||
SemaphoreAcquireSwitch acquireSwitch : 1;
|
||||
u8 _pad3_ : 7;
|
||||
SemaphoreReleaseWfi releaseWfi : 1;
|
||||
u8 _pad4_ : 3;
|
||||
SemaphoreReleaseSize releaseSize : 1;
|
||||
u8 _pad5_ : 2;
|
||||
SemaphoreReduction reduction : 4;
|
||||
SemaphoreFormat format : 1;
|
||||
};
|
||||
} semaphore;
|
||||
|
||||
u32 nonStallInterrupt;
|
||||
u32 fbFlush;
|
||||
u32 _pad1_[2];
|
||||
u32 memOpC;
|
||||
u32 memOpD;
|
||||
u32 _pad2_[6];
|
||||
u32 setReference;
|
||||
u32 _pad3_[7];
|
||||
|
||||
struct {
|
||||
u32 payload;
|
||||
|
||||
struct {
|
||||
SyncpointOperation operation : 1;
|
||||
u8 _pad0_ : 3;
|
||||
SyncpointWaitSwitch waitSwitch : 1;
|
||||
u8 _pad1_ : 3;
|
||||
u16 index : 12;
|
||||
u16 _pad2_ : 12;
|
||||
};
|
||||
} syncpoint;
|
||||
|
||||
struct {
|
||||
WfiScope scope : 1;
|
||||
u32 _pad_ : 31;
|
||||
} wfi;
|
||||
|
||||
u32 crcCheck;
|
||||
|
||||
struct {
|
||||
YieldOp op : 2;
|
||||
u32 _pad_ : 30;
|
||||
} yield;
|
||||
};
|
||||
} registers{};
|
||||
static_assert(sizeof(Registers) == (constant::GpfifoRegisterCount * sizeof(u32)));
|
||||
#pragma pack(pop)
|
||||
|
||||
public:
|
||||
GPFIFO(const DeviceState &state) : Engine(state) {}
|
||||
|
||||
void CallMethod(MethodParams params) override {
|
||||
state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
|
||||
|
||||
registers.raw[params.method] = params.argument;
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
@ -1,575 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gpu/macro_interpreter.h>
|
||||
#include "engine.h"
|
||||
|
||||
#define MAXWELL3D_OFFSET(field) U32_OFFSET(skyline::gpu::engine::Maxwell3D::Registers, field)
|
||||
|
||||
namespace skyline {
|
||||
namespace constant {
|
||||
constexpr u32 Maxwell3DRegisterCounter{0xE00}; //!< The number of Maxwell 3D registers
|
||||
}
|
||||
|
||||
namespace gpu::engine {
|
||||
/**
|
||||
* @brief The Maxwell 3D engine handles processing 3D graphics
|
||||
*/
|
||||
class Maxwell3D : public Engine {
|
||||
private:
|
||||
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
|
||||
|
||||
struct {
|
||||
u32 index;
|
||||
std::vector<u32> arguments;
|
||||
} macroInvocation{}; //!< Data for a macro that is pending execution
|
||||
|
||||
MacroInterpreter macroInterpreter;
|
||||
|
||||
void HandleSemaphoreCounterOperation();
|
||||
|
||||
void WriteSemaphoreResult(u64 result);
|
||||
|
||||
public:
|
||||
/**
|
||||
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
|
||||
*/
|
||||
#pragma pack(push, 1)
|
||||
union Registers {
|
||||
std::array<u32, constant::Maxwell3DRegisterCounter> raw;
|
||||
|
||||
struct Address {
|
||||
u32 high;
|
||||
u32 low;
|
||||
|
||||
u64 Pack() {
|
||||
return (static_cast<u64>(high) << 32) | low;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Address) == sizeof(u64));
|
||||
|
||||
enum class MmeShadowRamControl : u32 {
|
||||
MethodTrack = 0,
|
||||
MethodTrackWithFilter = 1,
|
||||
MethodPassthrough = 2,
|
||||
MethodReplay = 3,
|
||||
};
|
||||
|
||||
struct ViewportTransform {
|
||||
enum class Swizzle : u8 {
|
||||
PositiveX = 0,
|
||||
NegativeX = 1,
|
||||
PositiveY = 2,
|
||||
NegativeY = 3,
|
||||
PositiveZ = 4,
|
||||
NegativeZ = 5,
|
||||
PositiveW = 6,
|
||||
NegativeW = 7,
|
||||
};
|
||||
|
||||
float scaleX;
|
||||
float scaleY;
|
||||
float scaleZ;
|
||||
float translateX;
|
||||
float translateY;
|
||||
float translateZ;
|
||||
|
||||
struct {
|
||||
Swizzle x : 3;
|
||||
u8 _pad0_ : 1;
|
||||
Swizzle y : 3;
|
||||
u8 _pad1_ : 1;
|
||||
Swizzle z : 3;
|
||||
u8 _pad2_ : 1;
|
||||
Swizzle w : 3;
|
||||
u32 _pad3_ : 17;
|
||||
} swizzles;
|
||||
|
||||
struct {
|
||||
u8 x : 5;
|
||||
u8 _pad0_ : 3;
|
||||
u8 y : 5;
|
||||
u32 _pad1_ : 19;
|
||||
} subpixelPrecisionBias;
|
||||
};
|
||||
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
|
||||
|
||||
struct Viewport {
|
||||
struct {
|
||||
u16 x;
|
||||
u16 width;
|
||||
};
|
||||
|
||||
struct {
|
||||
u16 y;
|
||||
u16 height;
|
||||
};
|
||||
|
||||
float depthRangeNear;
|
||||
float depthRangeFar;
|
||||
};
|
||||
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
|
||||
|
||||
enum class PolygonMode : u32 {
|
||||
Point = 0x1B00,
|
||||
Line = 0x1B01,
|
||||
Fill = 0x1B02,
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 raw;
|
||||
|
||||
enum class Size : u8 {
|
||||
Size_1x32 = 0x12,
|
||||
Size_2x32 = 0x04,
|
||||
Size_3x32 = 0x02,
|
||||
Size_4x32 = 0x01,
|
||||
Size_1x16 = 0x1B,
|
||||
Size_2x16 = 0x0F,
|
||||
Size_3x16 = 0x05,
|
||||
Size_4x16 = 0x03,
|
||||
Size_1x8 = 0x1D,
|
||||
Size_2x8 = 0x18,
|
||||
Size_3x8 = 0x13,
|
||||
Size_4x8 = 0x0A,
|
||||
Size_10_10_10_2 = 0x30,
|
||||
Size_11_11_10 = 0x31,
|
||||
};
|
||||
|
||||
enum class Type : u8 {
|
||||
None = 0,
|
||||
SNorm = 1,
|
||||
UNorm = 2,
|
||||
SInt = 3,
|
||||
UInt = 4,
|
||||
UScaled = 5,
|
||||
SScaled = 6,
|
||||
Float = 7,
|
||||
};
|
||||
|
||||
struct {
|
||||
u8 bufferId : 5;
|
||||
u8 _pad0_ : 1;
|
||||
bool fixed : 1;
|
||||
u16 offset : 14;
|
||||
Size size : 6;
|
||||
Type type : 3;
|
||||
u8 _pad1_ : 1;
|
||||
bool bgra : 1;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(VertexAttribute) == sizeof(u32));
|
||||
|
||||
enum class CompareOp : u32 {
|
||||
Never = 1,
|
||||
Less = 2,
|
||||
Equal = 3,
|
||||
LessOrEqual = 4,
|
||||
Greater = 5,
|
||||
NotEqual = 6,
|
||||
GreaterOrEqual = 7,
|
||||
Always = 8,
|
||||
|
||||
NeverGL = 0x200,
|
||||
LessGL = 0x201,
|
||||
EqualGL = 0x202,
|
||||
LessOrEqualGL = 0x203,
|
||||
GreaterGL = 0x204,
|
||||
NotEqualGL = 0x205,
|
||||
GreaterOrEqualGL = 0x206,
|
||||
AlwaysGL = 0x207,
|
||||
};
|
||||
|
||||
struct Blend {
|
||||
enum class Op : u32 {
|
||||
Add = 1,
|
||||
Subtract = 2,
|
||||
ReverseSubtract = 3,
|
||||
Minimum = 4,
|
||||
Maximum = 5,
|
||||
|
||||
AddGL = 0x8006,
|
||||
SubtractGL = 0x8007,
|
||||
ReverseSubtractGL = 0x8008,
|
||||
MinimumGL = 0x800A,
|
||||
MaximumGL = 0x800B,
|
||||
};
|
||||
|
||||
enum class Factor : u32 {
|
||||
Zero = 0x1,
|
||||
One = 0x2,
|
||||
SourceColor = 0x3,
|
||||
OneMinusSourceColor = 0x4,
|
||||
SourceAlpha = 0x5,
|
||||
OneMinusSourceAlpha = 0x6,
|
||||
DestAlpha = 0x7,
|
||||
OneMinusDestAlpha = 0x8,
|
||||
DestColor = 0x9,
|
||||
OneMinusDestColor = 0xA,
|
||||
SourceAlphaSaturate = 0xB,
|
||||
Source1Color = 0x10,
|
||||
OneMinusSource1Color = 0x11,
|
||||
Source1Alpha = 0x12,
|
||||
OneMinusSource1Alpha = 0x13,
|
||||
ConstantColor = 0x61,
|
||||
OneMinusConstantColor = 0x62,
|
||||
ConstantAlpha = 0x63,
|
||||
OneMinusConstantAlpha = 0x64,
|
||||
|
||||
ZeroGL = 0x4000,
|
||||
OneGL = 0x4001,
|
||||
SourceColorGL = 0x4300,
|
||||
OneMinusSourceColorGL = 0x4301,
|
||||
SourceAlphaGL = 0x4302,
|
||||
OneMinusSourceAlphaGL = 0x4303,
|
||||
DestAlphaGL = 0x4304,
|
||||
OneMinusDestAlphaGL = 0x4305,
|
||||
DestColorGL = 0x4306,
|
||||
OneMinusDestColorGL = 0x4307,
|
||||
SourceAlphaSaturateGL = 0x4308,
|
||||
ConstantColorGL = 0xC001,
|
||||
OneMinusConstantColorGL = 0xC002,
|
||||
ConstantAlphaGL = 0xC003,
|
||||
OneMinusConstantAlphaGL = 0xC004,
|
||||
Source1ColorGL = 0xC900,
|
||||
OneMinusSource1ColorGL = 0xC901,
|
||||
Source1AlphaGL = 0xC902,
|
||||
OneMinusSource1AlphaGL = 0xC903,
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 seperateAlpha;
|
||||
Op colorOp;
|
||||
Factor colorSrcFactor;
|
||||
Factor colorDestFactor;
|
||||
Op alphaOp;
|
||||
Factor alphaSrcFactor;
|
||||
Factor alphaDestFactor;
|
||||
u32 _pad_;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
|
||||
|
||||
enum class StencilOp : u32 {
|
||||
Keep = 1,
|
||||
Zero = 2,
|
||||
Replace = 3,
|
||||
IncrementAndClamp = 4,
|
||||
DecrementAndClamp = 5,
|
||||
Invert = 6,
|
||||
IncrementAndWrap = 7,
|
||||
DecrementAndWrap = 8,
|
||||
};
|
||||
|
||||
enum class FrontFace : u32 {
|
||||
Clockwise = 0x900,
|
||||
CounterClockwise = 0x901,
|
||||
};
|
||||
|
||||
enum class CullFace : u32 {
|
||||
Front = 0x404,
|
||||
Back = 0x405,
|
||||
FrontAndBack = 0x408,
|
||||
};
|
||||
|
||||
union ColorWriteMask {
|
||||
u32 raw;
|
||||
|
||||
struct {
|
||||
u8 r : 4;
|
||||
u8 g : 4;
|
||||
u8 b : 4;
|
||||
u8 a : 4;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(ColorWriteMask) == sizeof(u32));
|
||||
|
||||
struct SemaphoreInfo {
|
||||
enum class Op : u8 {
|
||||
Release = 0,
|
||||
Acquire = 1,
|
||||
Counter = 2,
|
||||
Trap = 3,
|
||||
};
|
||||
|
||||
enum class ReductionOp : u8 {
|
||||
Add = 0,
|
||||
Min = 1,
|
||||
Max = 2,
|
||||
Inc = 3,
|
||||
Dec = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Xor = 7,
|
||||
};
|
||||
|
||||
enum class Unit : u8 {
|
||||
VFetch = 1,
|
||||
VP = 2,
|
||||
Rast = 4,
|
||||
StrmOut = 5,
|
||||
GP = 6,
|
||||
ZCull = 7,
|
||||
Prop = 10,
|
||||
Crop = 15,
|
||||
};
|
||||
|
||||
enum class SyncCondition : u8 {
|
||||
NotEqual = 0,
|
||||
GreaterThan = 1,
|
||||
};
|
||||
|
||||
enum class Format : u8 {
|
||||
U32 = 0,
|
||||
I32 = 1,
|
||||
};
|
||||
|
||||
enum class CounterType : u8 {
|
||||
Zero = 0x0,
|
||||
InputVertices = 0x1,
|
||||
InputPrimitives = 0x3,
|
||||
VertexShaderInvocations = 0x5,
|
||||
GeometryShaderInvocations = 0x7,
|
||||
GeometryShaderPrimitives = 0x9,
|
||||
ZcullStats0 = 0xA,
|
||||
TransformFeedbackPrimitivesWritten = 0xB,
|
||||
ZcullStats1 = 0xC,
|
||||
ZcullStats2 = 0xE,
|
||||
ClipperInputPrimitives = 0xF,
|
||||
ZcullStats3 = 0x10,
|
||||
ClipperOutputPrimitives = 0x11,
|
||||
PrimitivesGenerated = 0x12,
|
||||
FragmentShaderInvocations = 0x13,
|
||||
SamplesPassed = 0x15,
|
||||
TransformFeedbackOffset = 0x1A,
|
||||
TessControlShaderInvocations = 0x1B,
|
||||
TessEvaluationShaderInvocations = 0x1D,
|
||||
TessEvaluationShaderPrimitives = 0x1F,
|
||||
};
|
||||
|
||||
enum class StructureSize : u8 {
|
||||
FourWords = 0,
|
||||
OneWord = 1,
|
||||
};
|
||||
|
||||
Op op : 2;
|
||||
bool flushDisable : 1;
|
||||
bool reductionEnable : 1;
|
||||
bool fenceEnable : 1;
|
||||
u8 _pad0_ : 4;
|
||||
ReductionOp reductionOp : 3;
|
||||
Unit unit : 4;
|
||||
SyncCondition syncCondition : 1;
|
||||
Format format : 2;
|
||||
u8 _pad1_ : 1;
|
||||
bool awakenEnable : 1;
|
||||
u8 _pad2_ : 2;
|
||||
CounterType counterType : 5;
|
||||
StructureSize structureSize : 1;
|
||||
};
|
||||
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
|
||||
|
||||
enum class CoordOrigin : u8 {
|
||||
LowerLeft = 0,
|
||||
UpperLeft = 1,
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 _pad0_[0x40]; // 0x0
|
||||
u32 noOperation; // 0x40
|
||||
u32 _pad1_[0x3]; // 0x41
|
||||
u32 waitForIdle; // 0x44
|
||||
|
||||
struct {
|
||||
u32 instructionRamPointer; // 0x45
|
||||
u32 instructionRamLoad; // 0x46
|
||||
u32 startAddressRamPointer; // 0x47
|
||||
u32 startAddressRamLoad; // 0x48
|
||||
MmeShadowRamControl shadowRamControl; // 0x49
|
||||
} mme;
|
||||
|
||||
u32 _pad2_[0x68]; // 0x4A
|
||||
|
||||
struct {
|
||||
u16 id : 12;
|
||||
u8 _pad0_ : 4;
|
||||
bool flushCache : 1;
|
||||
u8 _pad1_ : 3;
|
||||
bool increment : 1;
|
||||
u16 _pad2_ : 11;
|
||||
} syncpointAction; // 0xB2
|
||||
|
||||
u32 _pad3_[0x2C]; // 0xB3
|
||||
u32 rasterizerEnable; // 0xDF
|
||||
u32 _pad4_[0x1A0]; // 0xE0
|
||||
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
|
||||
std::array<Viewport, 0x10> viewport; // 0x300
|
||||
u32 _pad5_[0x2B]; // 0x340
|
||||
|
||||
struct {
|
||||
PolygonMode front; // 0x36B
|
||||
PolygonMode back; // 0x36C
|
||||
} polygonMode;
|
||||
|
||||
u32 _pad6_[0x68]; // 0x36D
|
||||
|
||||
struct {
|
||||
u32 compareRef; // 0x3D5
|
||||
u32 writeMask; // 0x3D6
|
||||
u32 compareMask; // 0x3D7
|
||||
} stencilBackExtra;
|
||||
|
||||
u32 _pad7_[0x13]; // 0x3D8
|
||||
u32 rtSeparateFragData; // 0x3EB
|
||||
u32 _pad8_[0x6C]; // 0x3EC
|
||||
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
|
||||
u32 _pad9_[0x4B]; // 0x478
|
||||
CompareOp depthTestFunc; // 0x4C3
|
||||
float alphaTestRef; // 0x4C4
|
||||
CompareOp alphaTestFunc; // 0x4C5
|
||||
u32 drawTFBStride; // 0x4C6
|
||||
|
||||
struct {
|
||||
float r; // 0x4C7
|
||||
float g; // 0x4C8
|
||||
float b; // 0x4C9
|
||||
float a; // 0x4CA
|
||||
} blendConstant;
|
||||
|
||||
u32 _pad10_[0x4]; // 0x4CB
|
||||
|
||||
struct {
|
||||
u32 seperateAlpha; // 0x4CF
|
||||
Blend::Op colorOp; // 0x4D0
|
||||
Blend::Factor colorSrcFactor; // 0x4D1
|
||||
Blend::Factor colorDestFactor; // 0x4D2
|
||||
Blend::Op alphaOp; // 0x4D3
|
||||
Blend::Factor alphaSrcFactor; // 0x4D4
|
||||
u32 _pad_; // 0x4D5
|
||||
Blend::Factor alphaDestFactor; // 0x4D6
|
||||
|
||||
u32 enableCommon; // 0x4D7
|
||||
std::array<u32, 8> enable; // 0x4D8 For each render target
|
||||
} blend;
|
||||
|
||||
u32 stencilEnable; // 0x4E0
|
||||
|
||||
struct {
|
||||
StencilOp failOp; // 0x4E1
|
||||
StencilOp zFailOp; // 0x4E2
|
||||
StencilOp zPassOp; // 0x4E3
|
||||
|
||||
struct {
|
||||
CompareOp op; // 0x4E4
|
||||
i32 ref; // 0x4E5
|
||||
u32 mask; // 0x4E6
|
||||
} compare;
|
||||
|
||||
u32 writeMask; // 0x4E7
|
||||
} stencilFront;
|
||||
|
||||
u32 _pad11_[0x4]; // 0x4E8
|
||||
float lineWidthSmooth; // 0x4EC
|
||||
float lineWidthAliased; // 0x4D
|
||||
u32 _pad12_[0x1F]; // 0x4EE
|
||||
u32 drawBaseVertex; // 0x50D
|
||||
u32 drawBaseInstance; // 0x50E
|
||||
u32 _pad13_[0x35]; // 0x50F
|
||||
u32 clipDistanceEnable; // 0x544
|
||||
u32 sampleCounterEnable; // 0x545
|
||||
float pointSpriteSize; // 0x546
|
||||
u32 zCullStatCountersEnable; // 0x547
|
||||
u32 pointSpriteEnable; // 0x548
|
||||
u32 _pad14_; // 0x549
|
||||
u32 shaderExceptions; // 0x54A
|
||||
u32 _pad15_[0x2]; // 0x54B
|
||||
u32 multisampleEnable; // 0x54D
|
||||
u32 depthTargetEnable; // 0x54E
|
||||
|
||||
struct {
|
||||
bool alphaToCoverage : 1;
|
||||
u8 _pad0_ : 3;
|
||||
bool alphaToOne : 1;
|
||||
u32 _pad1_ : 27;
|
||||
} multisampleControl; // 0x54F
|
||||
|
||||
u32 _pad16_[0x7]; // 0x550
|
||||
|
||||
struct {
|
||||
Address address; // 0x557
|
||||
u32 maximumIndex; // 0x559
|
||||
} texSamplerPool;
|
||||
|
||||
u32 _pad17_; // 0x55A
|
||||
u32 polygonOffsetFactor; // 0x55B
|
||||
u32 lineSmoothEnable; // 0x55C
|
||||
|
||||
struct {
|
||||
Address address; // 0x55D
|
||||
u32 maximumIndex; // 0x55F
|
||||
} texHeaderPool;
|
||||
|
||||
u32 _pad18_[0x5]; // 0x560
|
||||
|
||||
u32 stencilTwoSideEnable; // 0x565
|
||||
|
||||
struct {
|
||||
StencilOp failOp; // 0x566
|
||||
StencilOp zFailOp; // 0x567
|
||||
StencilOp zPassOp; // 0x568
|
||||
CompareOp compareOp; // 0x569
|
||||
} stencilBack;
|
||||
|
||||
u32 _pad19_[0x17]; // 0x56A
|
||||
|
||||
struct {
|
||||
u8 _unk_ : 2;
|
||||
CoordOrigin origin : 1;
|
||||
u16 enable : 10;
|
||||
u32 _pad_ : 19;
|
||||
} pointCoordReplace; // 0x581
|
||||
|
||||
u32 _pad20_[0xC4]; // 0x582
|
||||
u32 cullFaceEnable; // 0x646
|
||||
FrontFace frontFace; // 0x647
|
||||
CullFace cullFace; // 0x648
|
||||
u32 pixelCentreImage; // 0x649
|
||||
u32 _pad21_; // 0x64A
|
||||
u32 viewportTransformEnable; // 0x64B
|
||||
u32 _pad22_[0x34]; // 0x64A
|
||||
std::array<ColorWriteMask, 8> colorMask; // 0x680 For each render target
|
||||
u32 _pad23_[0x38]; // 0x688
|
||||
|
||||
struct {
|
||||
Address address; // 0x6C0
|
||||
u32 payload; // 0x6C2
|
||||
SemaphoreInfo info; // 0x6C3
|
||||
} semaphore;
|
||||
|
||||
u32 _pad24_[0xBC]; // 0x6C4
|
||||
std::array<Blend, 8> independentBlend; // 0x780 For each render target
|
||||
u32 _pad25_[0x100]; // 0x7C0
|
||||
u32 firmwareCall[0x20]; // 0x8C0
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(Registers) == (constant::Maxwell3DRegisterCounter * sizeof(u32)));
|
||||
#pragma pack(pop)
|
||||
|
||||
Registers registers{};
|
||||
Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
|
||||
|
||||
std::array<u32, 0x10000> macroCode{}; //!< This stores GPU macros, the 256kb size is from Ryujinx
|
||||
|
||||
Maxwell3D(const DeviceState &state);
|
||||
|
||||
/**
|
||||
* @brief Resets the Maxwell 3D registers to their default values
|
||||
*/
|
||||
void ResetRegs();
|
||||
|
||||
void CallMethod(MethodParams params) override;
|
||||
};
|
||||
}
|
||||
}
|
@ -1,170 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/circular_queue.h>
|
||||
#include "engines/gpfifo.h"
|
||||
#include "memory_manager.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace gpfifo {
|
||||
/**
|
||||
* @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
|
||||
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
|
||||
*/
|
||||
struct GpEntry {
|
||||
enum class Fetch : u8 {
|
||||
Unconditional = 0,
|
||||
Conditional = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
u32 entry0;
|
||||
|
||||
struct {
|
||||
Fetch fetch : 1;
|
||||
u8 _pad_ : 1;
|
||||
u32 get : 30;
|
||||
};
|
||||
};
|
||||
|
||||
enum class Opcode : u8 {
|
||||
Nop = 0,
|
||||
Illegal = 1,
|
||||
Crc = 2,
|
||||
PbCrc = 3,
|
||||
};
|
||||
|
||||
enum class Priv : u8 {
|
||||
User = 0,
|
||||
Kernel = 1,
|
||||
};
|
||||
|
||||
enum class Level : u8 {
|
||||
Main = 0,
|
||||
Subroutine = 1,
|
||||
};
|
||||
|
||||
enum class Sync : u8 {
|
||||
Proceed = 0,
|
||||
Wait = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
u32 entry1;
|
||||
|
||||
struct {
|
||||
union {
|
||||
u8 getHi;
|
||||
Opcode opcode;
|
||||
};
|
||||
|
||||
Priv priv : 1;
|
||||
Level level : 1;
|
||||
u32 size : 21;
|
||||
Sync sync : 1;
|
||||
};
|
||||
};
|
||||
|
||||
constexpr u64 Address() const {
|
||||
return (static_cast<u64>(getHi) << 32) | (static_cast<u64>(get) << 2);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(GpEntry) == sizeof(u64));
|
||||
|
||||
/**
|
||||
* @brief A single pushbuffer method header that describes a compressed method sequence
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
|
||||
*/
|
||||
union PushBufferMethodHeader {
|
||||
u32 raw;
|
||||
|
||||
enum class TertOp : u8 {
|
||||
Grp0IncMethod = 0,
|
||||
Grp0SetSubDevMask = 1,
|
||||
Grp0StoreSubDevMask = 2,
|
||||
Grp0UseSubDevMask = 3,
|
||||
Grp2NonIncMethod = 0,
|
||||
};
|
||||
|
||||
enum class SecOp : u8 {
|
||||
Grp0UseTert = 0,
|
||||
IncMethod = 1,
|
||||
Grp2UseTert = 2,
|
||||
NonIncMethod = 3,
|
||||
ImmdDataMethod = 4,
|
||||
OneInc = 5,
|
||||
Reserved6 = 6,
|
||||
EndPbSegment = 7,
|
||||
};
|
||||
|
||||
u16 methodAddress : 12;
|
||||
struct {
|
||||
u8 _pad0_ : 4;
|
||||
u16 subDeviceMask : 12;
|
||||
};
|
||||
|
||||
struct {
|
||||
u16 _pad1_ : 13;
|
||||
u8 methodSubChannel : 3;
|
||||
union {
|
||||
TertOp tertOp : 3;
|
||||
u16 methodCount : 13;
|
||||
u16 immdData : 13;
|
||||
};
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 _pad2_ : 29;
|
||||
SecOp secOp : 3;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
|
||||
|
||||
/**
|
||||
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
|
||||
*/
|
||||
class GPFIFO {
|
||||
const DeviceState &state;
|
||||
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
|
||||
std::array<std::shared_ptr<engine::Engine>, 8> subchannels;
|
||||
std::optional<CircularQueue<GpEntry>> pushBuffers;
|
||||
std::thread thread; //!< The thread that manages processing of pushbuffers
|
||||
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
|
||||
|
||||
/**
|
||||
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
|
||||
*/
|
||||
void Process(GpEntry gpEntry);
|
||||
|
||||
/**
|
||||
* @brief Sends a method call to the GPU hardware
|
||||
*/
|
||||
void Send(MethodParams params);
|
||||
|
||||
public:
|
||||
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
|
||||
|
||||
~GPFIFO();
|
||||
|
||||
/**
|
||||
* @param numBuffers The amount of push-buffers to allocate in the circular buffer
|
||||
*/
|
||||
void Initialize(size_t numBuffers);
|
||||
|
||||
/**
|
||||
* @brief Executes all pending entries in the FIFO
|
||||
*/
|
||||
void Run();
|
||||
|
||||
/**
|
||||
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
|
||||
*/
|
||||
void Push(span<GpEntry> entries);
|
||||
};
|
||||
}
|
||||
}
|
@ -1,145 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline {
|
||||
namespace constant {
|
||||
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
|
||||
}
|
||||
|
||||
namespace gpu::vmm {
|
||||
enum class ChunkState {
|
||||
Unmapped, //!< The chunk is unmapped
|
||||
Reserved, //!< The chunk is reserved
|
||||
Mapped //!< The chunk is mapped and a CPU side address is present
|
||||
};
|
||||
|
||||
struct ChunkDescriptor {
|
||||
u64 virtAddr; //!< The address of the chunk in the virtual address space
|
||||
u64 size; //!< The size of the chunk in bytes
|
||||
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
|
||||
ChunkState state;
|
||||
|
||||
ChunkDescriptor(u64 virtAddr, u64 size, u8 *cpuPtr, ChunkState state) : virtAddr(virtAddr), size(size), cpuPtr(cpuPtr), state(state) {}
|
||||
|
||||
/**
|
||||
* @return If the given chunk can be contained wholly within this chunk
|
||||
*/
|
||||
inline bool CanContain(const ChunkDescriptor &chunk) {
|
||||
return (chunk.virtAddr >= virtAddr) && ((size + virtAddr) >= (chunk.size + chunk.virtAddr));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The MemoryManager class handles mapping between a virtual address space and an application's address space
|
||||
*/
|
||||
class MemoryManager {
|
||||
private:
|
||||
const DeviceState &state;
|
||||
std::vector<ChunkDescriptor> chunks;
|
||||
std::shared_mutex vmmMutex;
|
||||
|
||||
/**
|
||||
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
|
||||
* @note vmmMutex MUST be locked when calling this
|
||||
* @param desiredState The state of the chunk to find
|
||||
* @param size The minimum size of the chunk to find
|
||||
* @param alignment The minimum alignment of the chunk to find
|
||||
* @return The first applicable chunk
|
||||
*/
|
||||
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
|
||||
|
||||
/**
|
||||
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
|
||||
* @note vmmMutex MUST be locked when calling this
|
||||
* @param newChunk The chunk to insert
|
||||
* @return The base virtual address of the inserted chunk
|
||||
*/
|
||||
u64 InsertChunk(const ChunkDescriptor &newChunk);
|
||||
|
||||
public:
|
||||
MemoryManager(const DeviceState &state);
|
||||
|
||||
/**
|
||||
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
|
||||
* @param size The size of the region to reserve
|
||||
* @param alignment The alignment of the region to reserve
|
||||
* @return The base virtual address of the reserved region
|
||||
*/
|
||||
u64 ReserveSpace(u64 size, u64 alignment);
|
||||
|
||||
/**
|
||||
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
|
||||
* @param virtAddr The virtual base address of the region to allocate
|
||||
* @param size The size of the region to allocate
|
||||
* @return The base virtual address of the reserved region
|
||||
*/
|
||||
u64 ReserveFixed(u64 virtAddr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
|
||||
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
|
||||
* @param size The size of the region to map
|
||||
* @return The base virtual address of the mapped region
|
||||
*/
|
||||
u64 MapAllocate(u8 *cpuPtr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Maps a CPU memory region to a fixed region in the virtual address space
|
||||
* @param virtAddr The target virtual address of the region
|
||||
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
|
||||
* @param size The size of the region to map
|
||||
* @return The base virtual address of the mapped region
|
||||
*/
|
||||
u64 MapFixed(u64 virtAddr, u8 *cpuPtr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Unmaps all chunks in the given region from the virtual address space
|
||||
* @return Whether the operation succeeded
|
||||
*/
|
||||
bool Unmap(u64 virtAddr, u64 size);
|
||||
|
||||
void Read(u8 *destination, u64 virtAddr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Reads in a span from a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Read(span <T> destination, u64 virtAddr) {
|
||||
Read(reinterpret_cast<u8 *>(destination.data()), virtAddr, destination.size_bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads in an object from a region of the virtual address space
|
||||
* @tparam T The type of object to return
|
||||
*/
|
||||
template<typename T>
|
||||
T Read(u64 virtAddr) {
|
||||
T obj;
|
||||
Read(reinterpret_cast<u8 *>(&obj), virtAddr, sizeof(T));
|
||||
return obj;
|
||||
}
|
||||
|
||||
void Write(u8 *source, u64 virtAddr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Writes out a span to a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Write(span <T> source, u64 virtAddr) {
|
||||
Write(reinterpret_cast<u8 *>(source.data()), virtAddr, source.size_bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads in an object from a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Write(T source, u64 virtAddr) {
|
||||
Write(reinterpret_cast<u8 *>(&source), virtAddr, sizeof(T));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -9,7 +9,7 @@ extern skyline::u16 Fps;
|
||||
extern skyline::u32 FrameTime;
|
||||
|
||||
namespace skyline::gpu {
|
||||
PresentationEngine::PresentationEngine(const DeviceState &state) : state(state), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<uint64_t>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
|
||||
PresentationEngine::PresentationEngine(const DeviceState &state) : state(state), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
|
||||
auto desc{presentationTrack.Serialize()};
|
||||
desc.set_name("Presentation");
|
||||
perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc);
|
||||
|
@ -1,56 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline {
|
||||
namespace constant {
|
||||
constexpr size_t MaxHwSyncpointCount{192}; //!< The maximum number of host1x syncpoints on T210
|
||||
}
|
||||
|
||||
namespace gpu {
|
||||
/**
|
||||
* @brief The Syncpoint class represents a single syncpoint in the GPU which is used for GPU -> CPU synchronisation
|
||||
*/
|
||||
class Syncpoint {
|
||||
private:
|
||||
struct Waiter {
|
||||
u32 threshold; //!< The syncpoint value to wait on to be reached
|
||||
std::function<void()> callback; //!< The callback to do after the wait has ended
|
||||
};
|
||||
|
||||
std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
|
||||
std::map<u64, Waiter> waiterMap;
|
||||
u64 nextWaiterId{1};
|
||||
|
||||
public:
|
||||
std::atomic<u32> value{};
|
||||
|
||||
/**
|
||||
* @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
|
||||
* @note The callback will be called immediately if the syncpoint has already reached the given threshold
|
||||
* @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
|
||||
*/
|
||||
u64 RegisterWaiter(u32 threshold, const std::function<void()> &callback);
|
||||
|
||||
/**
|
||||
* @brief Removes a waiter given by 'id' from the pending waiter map
|
||||
*/
|
||||
void DeregisterWaiter(u64 id);
|
||||
|
||||
/**
|
||||
* @brief Increments the syncpoint by 1
|
||||
* @return The new value of the syncpoint
|
||||
*/
|
||||
u32 Increment();
|
||||
|
||||
/**
|
||||
* @brief Waits for the syncpoint to reach given threshold
|
||||
* @return false if the timeout was reached, otherwise true
|
||||
*/
|
||||
bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
|
||||
};
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <gpu.h>
|
||||
#include <soc.h>
|
||||
#include <services/nvdrv/driver.h>
|
||||
#include "nvmap.h"
|
||||
#include "nvhost_as_gpu.h"
|
||||
@ -36,9 +36,9 @@ namespace skyline::service::nvdrv::device {
|
||||
u64 size{static_cast<u64>(region.pages) * static_cast<u64>(region.pageSize)};
|
||||
|
||||
if (region.flags.fixed)
|
||||
region.offset = state.gpu->memoryManager.ReserveFixed(region.offset, size);
|
||||
region.offset = state.soc->gmmu.ReserveFixed(region.offset, size);
|
||||
else
|
||||
region.offset = state.gpu->memoryManager.ReserveSpace(size, region.align);
|
||||
region.offset = state.soc->gmmu.ReserveSpace(size, region.align);
|
||||
|
||||
if (region.offset == 0) {
|
||||
state.logger->Warn("Failed to allocate GPU address space region!");
|
||||
@ -56,7 +56,7 @@ namespace skyline::service::nvdrv::device {
|
||||
|
||||
// Non-fixed regions are unmapped so that they can be used by future non-fixed mappings
|
||||
if (!region.fixed)
|
||||
if (!state.gpu->memoryManager.Unmap(offset, region.size))
|
||||
if (!state.soc->gmmu.Unmap(offset, region.size))
|
||||
state.logger->Warn("Failed to unmap region at 0x{:X}", offset);
|
||||
|
||||
regionMap.erase(offset);
|
||||
@ -94,7 +94,7 @@ namespace skyline::service::nvdrv::device {
|
||||
u64 gpuAddress{data.offset + data.bufferOffset};
|
||||
u8 *cpuPtr{region->second.ptr + data.bufferOffset};
|
||||
|
||||
if (!state.gpu->memoryManager.MapFixed(gpuAddress, cpuPtr, data.mappingSize)) {
|
||||
if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, data.mappingSize)) {
|
||||
state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress);
|
||||
return NvStatus::BadParameter;
|
||||
}
|
||||
@ -110,9 +110,9 @@ namespace skyline::service::nvdrv::device {
|
||||
u64 size{data.mappingSize ? data.mappingSize : mapping->size};
|
||||
|
||||
if (data.flags.fixed)
|
||||
data.offset = state.gpu->memoryManager.MapFixed(data.offset, cpuPtr, size);
|
||||
data.offset = state.soc->gmmu.MapFixed(data.offset, cpuPtr, size);
|
||||
else
|
||||
data.offset = state.gpu->memoryManager.MapAllocate(cpuPtr, size);
|
||||
data.offset = state.soc->gmmu.MapAllocate(cpuPtr, size);
|
||||
|
||||
if (data.offset == 0) {
|
||||
state.logger->Warn("Failed to map GPU address space region!");
|
||||
@ -184,7 +184,7 @@ namespace skyline::service::nvdrv::device {
|
||||
u8 *cpuPtr{mapping->ptr + (static_cast<u64>(entry.mapOffset) << MinAlignmentShift)};
|
||||
u64 size{static_cast<u64>(entry.pages) << MinAlignmentShift};
|
||||
|
||||
state.gpu->memoryManager.MapFixed(virtAddr, cpuPtr, size);
|
||||
state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size);
|
||||
} catch (const std::out_of_range &) {
|
||||
state.logger->Warn("Invalid NvMap handle: 0x{:X}", entry.nvmapHandle);
|
||||
return NvStatus::BadParameter;
|
||||
|
@ -1,8 +1,8 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <soc.h>
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include <gpu.h>
|
||||
#include <services/nvdrv/driver.h>
|
||||
#include "nvhost_channel.h"
|
||||
|
||||
@ -25,7 +25,7 @@ namespace skyline::service::nvdrv::device {
|
||||
|
||||
NvStatus NvHostChannel::SubmitGpfifo(IoctlType type, span<u8> buffer, span<u8> inlineBuffer) {
|
||||
struct Data {
|
||||
gpu::gpfifo::GpEntry *entries; // In
|
||||
soc::gm20b::GpEntry *entries; // In
|
||||
u32 numEntries; // In
|
||||
union {
|
||||
struct __attribute__((__packed__)) {
|
||||
@ -53,9 +53,9 @@ namespace skyline::service::nvdrv::device {
|
||||
throw exception("Waiting on a fence through SubmitGpfifo is unimplemented");
|
||||
}
|
||||
|
||||
state.gpu->gpfifo.Push([&]() {
|
||||
state.soc->gm20b.gpfifo.Push([&]() {
|
||||
if (type == IoctlType::Ioctl2)
|
||||
return inlineBuffer.cast<gpu::gpfifo::GpEntry>();
|
||||
return inlineBuffer.cast<soc::gm20b::GpEntry>();
|
||||
else
|
||||
return span(data.entries, data.numEntries);
|
||||
}());
|
||||
@ -110,7 +110,7 @@ namespace skyline::service::nvdrv::device {
|
||||
u32 _res_[3]; // In
|
||||
} &data = buffer.as<Data>();
|
||||
|
||||
state.gpu->gpfifo.Initialize(data.numEntries);
|
||||
state.soc->gm20b.gpfifo.Initialize(data.numEntries);
|
||||
|
||||
auto driver{nvdrv::driver.lock()};
|
||||
channelFence.UpdateValue(driver->hostSyncpoint);
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
// Copyright © 2019-2020 Ryujinx Team and Contributors
|
||||
|
||||
#include <gpu.h>
|
||||
#include <soc.h>
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include <services/nvdrv/driver.h>
|
||||
#include "nvhost_ctrl.h"
|
||||
@ -46,20 +46,20 @@ namespace skyline::service::nvdrv::device {
|
||||
state = State::Signalled;
|
||||
}
|
||||
|
||||
void SyncpointEvent::Cancel(const std::shared_ptr<gpu::GPU> &gpuState) {
|
||||
void SyncpointEvent::Cancel(soc::host1x::Host1X &host1x) {
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
gpuState->syncpoints.at(fence.id).DeregisterWaiter(waiterId);
|
||||
host1x.syncpoints.at(fence.id).DeregisterWaiter(waiterId);
|
||||
Signal();
|
||||
event->ResetSignal();
|
||||
}
|
||||
|
||||
void SyncpointEvent::Wait(const std::shared_ptr<gpu::GPU> &gpuState, const Fence &pFence) {
|
||||
void SyncpointEvent::Wait(soc::host1x::Host1X &host1x, const Fence &pFence) {
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
fence = pFence;
|
||||
state = State::Waiting;
|
||||
waiterId = gpuState->syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
|
||||
waiterId = host1x.syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
|
||||
}
|
||||
|
||||
NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state) {}
|
||||
@ -105,7 +105,7 @@ namespace skyline::service::nvdrv::device {
|
||||
SyncpointEventValue value; // InOut
|
||||
} &data = buffer.as<Data>();
|
||||
|
||||
if (data.fence.id >= constant::MaxHwSyncpointCount)
|
||||
if (data.fence.id >= soc::host1x::SyncpointCount)
|
||||
return NvStatus::BadValue;
|
||||
|
||||
if (data.timeout == 0)
|
||||
@ -149,7 +149,7 @@ namespace skyline::service::nvdrv::device {
|
||||
|
||||
if (event->state == SyncpointEvent::State::Cancelled || event->state == SyncpointEvent::State::Available || event->state == SyncpointEvent::State::Signalled) {
|
||||
state.logger->Debug("Waiting on syncpoint event: {} with fence: ({}, {})", eventSlot, data.fence.id, data.fence.value);
|
||||
event->Wait(state.gpu, data.fence);
|
||||
event->Wait(state.soc->host1x, data.fence);
|
||||
|
||||
data.value.val = 0;
|
||||
|
||||
@ -189,7 +189,7 @@ namespace skyline::service::nvdrv::device {
|
||||
if (event->state == SyncpointEvent::State::Waiting) {
|
||||
event->state = SyncpointEvent::State::Cancelling;
|
||||
state.logger->Debug("Cancelling waiting syncpoint event: {}", eventSlot);
|
||||
event->Cancel(state.gpu);
|
||||
event->Cancel(state.soc->host1x);
|
||||
}
|
||||
|
||||
event->state = SyncpointEvent::State::Cancelled;
|
||||
|
@ -41,12 +41,12 @@ namespace skyline {
|
||||
/**
|
||||
* @brief Removes any wait requests on a syncpoint event and resets its state
|
||||
*/
|
||||
void Cancel(const std::shared_ptr<gpu::GPU> &gpuState);
|
||||
void Cancel(soc::host1x::Host1X &host1x);
|
||||
|
||||
/**
|
||||
* @brief Asynchronously waits on a syncpoint event using the given fence
|
||||
*/
|
||||
void Wait(const std::shared_ptr<gpu::GPU> &gpuState, const Fence &fence);
|
||||
void Wait(soc::host1x::Host1X &host1x, const Fence &fence);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
|
||||
#include <gpu.h>
|
||||
#include <soc.h>
|
||||
#include "nvhost_syncpoint.h"
|
||||
|
||||
namespace skyline::service::nvdrv {
|
||||
@ -28,7 +28,7 @@ namespace skyline::service::nvdrv {
|
||||
}
|
||||
|
||||
u32 NvHostSyncpoint::FindFreeSyncpoint() {
|
||||
for (u32 i{1}; i < constant::MaxHwSyncpointCount; i++)
|
||||
for (u32 i{1}; i < syncpoints.size(); i++)
|
||||
if (!syncpoints[i].reserved)
|
||||
return i;
|
||||
|
||||
@ -71,7 +71,7 @@ namespace skyline::service::nvdrv {
|
||||
if (!syncpoints.at(id).reserved)
|
||||
throw exception("Cannot update an unreserved syncpoint!");
|
||||
|
||||
syncpoints.at(id).counterMin = state.gpu->syncpoints.at(id).value.load();
|
||||
syncpoints.at(id).counterMin = state.soc->host1x.syncpoints.at(id).value.load();
|
||||
return syncpoints.at(id).counterMin;
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gpu/syncpoint.h>
|
||||
#include <soc/host1x.h>
|
||||
|
||||
namespace skyline::service::nvdrv {
|
||||
/**
|
||||
@ -22,7 +22,7 @@ namespace skyline::service::nvdrv {
|
||||
};
|
||||
|
||||
const DeviceState &state;
|
||||
std::array<SyncpointInfo, skyline::constant::MaxHwSyncpointCount> syncpoints{};
|
||||
std::array<SyncpointInfo, soc::host1x::SyncpointCount> syncpoints{};
|
||||
std::mutex reservationLock;
|
||||
|
||||
/**
|
||||
|
23
app/src/main/cpp/skyline/soc.h
Normal file
23
app/src/main/cpp/skyline/soc.h
Normal file
@ -0,0 +1,23 @@
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "soc/gmmu.h"
|
||||
#include "soc/host1x.h"
|
||||
#include "soc/gm20b.h"
|
||||
|
||||
namespace skyline::soc {
|
||||
/**
|
||||
* @brief An interface into all emulated components of the Tegra X1 SoC
|
||||
* @note Refer to the Tegra X1 Processor Block Diagram (1.2) for more information
|
||||
*/
|
||||
class SOC {
|
||||
public:
|
||||
gmmu::GraphicsMemoryManager gmmu;
|
||||
host1x::Host1X host1x;
|
||||
gm20b::GM20B gm20b;
|
||||
|
||||
SOC(const DeviceState &state) : gmmu(state), gm20b(state) {}
|
||||
};
|
||||
}
|
25
app/src/main/cpp/skyline/soc/gm20b.h
Normal file
25
app/src/main/cpp/skyline/soc/gm20b.h
Normal file
@ -0,0 +1,25 @@
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "gm20b/engines/maxwell_3d.h"
|
||||
#include "gm20b/gpfifo.h"
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
/**
|
||||
* @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
|
||||
* @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly
|
||||
*/
|
||||
class GM20B {
|
||||
public:
|
||||
engine::Engine fermi2D;
|
||||
engine::maxwell3d::Maxwell3D maxwell3D;
|
||||
engine::Engine maxwellCompute;
|
||||
engine::Engine maxwellDma;
|
||||
engine::Engine keplerMemory;
|
||||
GPFIFO gpfifo;
|
||||
|
||||
GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {}
|
||||
};
|
||||
}
|
@ -7,7 +7,7 @@
|
||||
|
||||
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace skyline::soc::gm20b {
|
||||
enum class EngineID {
|
||||
Fermi2D = 0x902D,
|
||||
KeplerMemory = 0xA140,
|
176
app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
Normal file
176
app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
Normal file
@ -0,0 +1,176 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "engine.h"
|
||||
|
||||
namespace skyline::soc::gm20b::engine {
|
||||
/**
|
||||
* @brief The GPFIFO engine handles managing macros and semaphores
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
|
||||
*/
|
||||
class GPFIFO : public Engine {
|
||||
public:
|
||||
static constexpr u32 RegisterCount{0x40}; //!< The number of GPFIFO registers
|
||||
|
||||
private:
|
||||
/**
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
|
||||
*/
|
||||
#pragma pack(push, 1)
|
||||
union Registers {
|
||||
std::array<u32, RegisterCount> raw;
|
||||
|
||||
enum class SemaphoreOperation : u8 {
|
||||
Acquire = 1,
|
||||
Release = 2,
|
||||
AcqGeq = 4,
|
||||
AcqAnd = 8,
|
||||
Reduction = 16,
|
||||
};
|
||||
|
||||
enum class SemaphoreAcquireSwitch : u8 {
|
||||
Disabled = 0,
|
||||
Enabled = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReleaseWfi : u8 {
|
||||
En = 0,
|
||||
Dis = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReleaseSize : u8 {
|
||||
SixteenBytes = 0,
|
||||
FourBytes = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReduction : u8 {
|
||||
Min = 0,
|
||||
Max = 1,
|
||||
Xor = 2,
|
||||
And = 3,
|
||||
Or = 4,
|
||||
Add = 5,
|
||||
Inc = 6,
|
||||
Dec = 7,
|
||||
};
|
||||
|
||||
enum class SemaphoreFormat : u8 {
|
||||
Signed = 0,
|
||||
Unsigned = 1,
|
||||
};
|
||||
|
||||
enum class MemOpTlbInvalidatePdb : u8 {
|
||||
One = 0,
|
||||
All = 1,
|
||||
};
|
||||
|
||||
enum class SyncpointOperation : u8 {
|
||||
Wait = 0,
|
||||
Incr = 1,
|
||||
};
|
||||
|
||||
enum class SyncpointWaitSwitch : u8 {
|
||||
Dis = 0,
|
||||
En = 1,
|
||||
};
|
||||
|
||||
enum class WfiScope : u8 {
|
||||
CurrentScgType = 0,
|
||||
All = 1,
|
||||
};
|
||||
|
||||
enum class YieldOp : u8 {
|
||||
Nop = 0,
|
||||
PbdmaTimeslice = 1,
|
||||
RunlistTimeslice = 2,
|
||||
Tsg = 3,
|
||||
};
|
||||
|
||||
struct {
|
||||
struct {
|
||||
u16 nvClass : 16;
|
||||
u8 engine : 5;
|
||||
u16 _pad_ : 11;
|
||||
} setObject;
|
||||
|
||||
u32 illegal;
|
||||
u32 nop;
|
||||
u32 _pad0_;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
u32 offsetUpper : 8;
|
||||
u32 _pad0_ : 24;
|
||||
};
|
||||
|
||||
struct {
|
||||
u8 _pad1_ : 2;
|
||||
u32 offsetLower : 30;
|
||||
};
|
||||
|
||||
u32 payload;
|
||||
|
||||
struct {
|
||||
SemaphoreOperation operation : 5;
|
||||
u8 _pad2_ : 7;
|
||||
SemaphoreAcquireSwitch acquireSwitch : 1;
|
||||
u8 _pad3_ : 7;
|
||||
SemaphoreReleaseWfi releaseWfi : 1;
|
||||
u8 _pad4_ : 3;
|
||||
SemaphoreReleaseSize releaseSize : 1;
|
||||
u8 _pad5_ : 2;
|
||||
SemaphoreReduction reduction : 4;
|
||||
SemaphoreFormat format : 1;
|
||||
};
|
||||
} semaphore;
|
||||
|
||||
u32 nonStallInterrupt;
|
||||
u32 fbFlush;
|
||||
u32 _pad1_[2];
|
||||
u32 memOpC;
|
||||
u32 memOpD;
|
||||
u32 _pad2_[6];
|
||||
u32 setReference;
|
||||
u32 _pad3_[7];
|
||||
|
||||
struct {
|
||||
u32 payload;
|
||||
|
||||
struct {
|
||||
SyncpointOperation operation : 1;
|
||||
u8 _pad0_ : 3;
|
||||
SyncpointWaitSwitch waitSwitch : 1;
|
||||
u8 _pad1_ : 3;
|
||||
u16 index : 12;
|
||||
u16 _pad2_ : 12;
|
||||
};
|
||||
} syncpoint;
|
||||
|
||||
struct {
|
||||
WfiScope scope : 1;
|
||||
u32 _pad_ : 31;
|
||||
} wfi;
|
||||
|
||||
u32 crcCheck;
|
||||
|
||||
struct {
|
||||
YieldOp op : 2;
|
||||
u32 _pad_ : 30;
|
||||
} yield;
|
||||
};
|
||||
} registers{};
|
||||
static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
|
||||
#pragma pack(pop)
|
||||
|
||||
public:
|
||||
GPFIFO(const DeviceState &state) : Engine(state) {}
|
||||
|
||||
void CallMethod(MethodParams params) override {
|
||||
state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
|
||||
|
||||
registers.raw[params.method] = params.argument;
|
||||
};
|
||||
};
|
||||
}
|
@ -1,11 +1,10 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include "engines/maxwell_3d.h"
|
||||
#include "memory_manager.h"
|
||||
#include "macro_interpreter.h"
|
||||
#include <soc/gmmu.h>
|
||||
#include <soc/gm20b/engines/maxwell_3d.h>
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
void MacroInterpreter::Execute(size_t offset, const std::vector<u32> &args) {
|
||||
// Reset the interpreter state
|
||||
registers = {};
|
||||
@ -28,9 +27,11 @@ namespace skyline::gpu {
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case Opcode::Operation::AddImmediate:
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, registers[opcode->srcA] + opcode->immediate);
|
||||
break;
|
||||
|
||||
case Opcode::Operation::BitfieldReplace: {
|
||||
u32 src{registers[opcode->srcB]};
|
||||
u32 dest{registers[opcode->srcA]};
|
||||
@ -47,6 +48,7 @@ namespace skyline::gpu {
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case Opcode::Operation::BitfieldExtractShiftLeftImmediate: {
|
||||
u32 src{registers[opcode->srcB]};
|
||||
u32 dest{registers[opcode->srcA]};
|
||||
@ -56,6 +58,7 @@ namespace skyline::gpu {
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case Opcode::Operation::BitfieldExtractShiftLeftRegister: {
|
||||
u32 src{registers[opcode->srcB]};
|
||||
u32 dest{registers[opcode->srcA]};
|
||||
@ -65,17 +68,19 @@ namespace skyline::gpu {
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case Opcode::Operation::ReadImmediate: {
|
||||
u32 result{maxwell3D.registers.raw[registers[opcode->srcA] + opcode->immediate]};
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case Opcode::Operation::Branch: {
|
||||
if (delayedOpcode != nullptr)
|
||||
throw exception("Cannot branch while inside a delay slot");
|
||||
|
||||
u32 value{registers[opcode->srcA]};
|
||||
bool branch{(opcode->branchCondition == Opcode::BranchCondition::Zero) ? (value == 0) : (value != 0)};
|
||||
bool branch{(opcode->branchCondition == Opcode::BranchCondition::Zero) == (value == 0)};
|
||||
|
||||
if (branch) {
|
||||
if (opcode->noDelay) {
|
||||
@ -91,6 +96,7 @@ namespace skyline::gpu {
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw exception("Unknown MME opcode encountered: 0x{:X}", static_cast<u8>(opcode->operation));
|
||||
}
|
||||
@ -186,15 +192,14 @@ namespace skyline::gpu {
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void MacroInterpreter::Send(u32 argument) {
|
||||
maxwell3D.CallMethod(MethodParams{methodAddress.address, argument, 0, true});
|
||||
|
||||
FORCE_INLINE void MacroInterpreter::Send(u32 pArgument) {
|
||||
maxwell3D.CallMethod(MethodParams{methodAddress.address, pArgument, 0, true});
|
||||
methodAddress.address += methodAddress.increment;
|
||||
}
|
||||
|
||||
FORCE_INLINE void MacroInterpreter::WriteRegister(u8 reg, u32 value) {
|
||||
// Register 0 should always be zero so block writes to it
|
||||
if (reg == 0)
|
||||
if (reg == 0) [[unlikely]]
|
||||
return;
|
||||
|
||||
registers[reg] = value;
|
@ -5,10 +5,8 @@
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace engine {
|
||||
class Maxwell3D;
|
||||
}
|
||||
namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
class Maxwell3D; // A forward declaration of Maxwell3D as we don't want to import it here
|
||||
|
||||
/**
|
||||
* @brief The MacroInterpreter class handles interpreting macros. Macros are small programs that run on the GPU and are used for things like instanced rendering.
|
||||
@ -105,14 +103,13 @@ namespace skyline::gpu {
|
||||
};
|
||||
};
|
||||
|
||||
engine::Maxwell3D &maxwell3D;
|
||||
Maxwell3D &maxwell3D; //!< A reference to the parent engine object
|
||||
|
||||
std::array<u32, 8> registers{};
|
||||
|
||||
Opcode *opcode{};
|
||||
const u32 *argument{};
|
||||
Opcode *opcode{}; //!< A pointer to the instruction that is currently being executed
|
||||
std::array<u32, 8> registers{}; //!< The state of all the general-purpose registers in the macro interpreter
|
||||
const u32 *argument{}; //!< A pointer to the argument buffer for the program, it is read from sequentially
|
||||
MethodAddress methodAddress{};
|
||||
bool carryFlag{};
|
||||
bool carryFlag{}; //!< A flag representing if an arithmetic operation has set the most significant bit
|
||||
|
||||
/**
|
||||
* @brief Steps forward one macro instruction, including delay slots
|
||||
@ -135,10 +132,13 @@ namespace skyline::gpu {
|
||||
*/
|
||||
void Send(u32 argument);
|
||||
|
||||
/**
|
||||
* @brief Writes to the specified register with sanity checking
|
||||
*/
|
||||
void WriteRegister(u8 reg, u32 value);
|
||||
|
||||
public:
|
||||
MacroInterpreter(engine::Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
|
||||
MacroInterpreter(Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
|
||||
|
||||
/**
|
||||
* @brief Executes a GPU macro from macro memory with the given arguments
|
@ -1,10 +1,9 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <gpu.h>
|
||||
#include "maxwell_3d.h"
|
||||
#include <soc.h>
|
||||
|
||||
namespace skyline::gpu::engine {
|
||||
namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) {
|
||||
ResetRegs();
|
||||
}
|
||||
@ -77,9 +76,9 @@ namespace skyline::gpu::engine {
|
||||
state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", params.method, params.argument);
|
||||
|
||||
// Methods that are greater than the register size are for macro control
|
||||
if (params.method > constant::Maxwell3DRegisterCounter) {
|
||||
if (params.method > RegisterCount) {
|
||||
if (!(params.method & 1))
|
||||
macroInvocation.index = ((params.method - constant::Maxwell3DRegisterCounter) >> 1) % macroPositions.size();
|
||||
macroInvocation.index = ((params.method - RegisterCount) >> 1) % macroPositions.size();
|
||||
|
||||
macroInvocation.arguments.push_back(params.argument);
|
||||
|
||||
@ -100,6 +99,8 @@ namespace skyline::gpu::engine {
|
||||
else if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodReplay)
|
||||
params.argument = shadowRegisters.raw[params.method];
|
||||
|
||||
#define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field)
|
||||
|
||||
switch (params.method) {
|
||||
case MAXWELL3D_OFFSET(mme.instructionRamLoad):
|
||||
if (registers.mme.instructionRamPointer >= macroCode.size())
|
||||
@ -118,7 +119,7 @@ namespace skyline::gpu::engine {
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(syncpointAction):
|
||||
state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(registers.syncpointAction.id));
|
||||
state.gpu->syncpoints.at(registers.syncpointAction.id).Increment();
|
||||
state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment();
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(semaphore.info):
|
||||
switch (registers.semaphore.info.op) {
|
||||
@ -137,6 +138,8 @@ namespace skyline::gpu::engine {
|
||||
registers.raw[0xD00] = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
#undef MAXWELL3D_OFFSET
|
||||
}
|
||||
|
||||
void Maxwell3D::HandleSemaphoreCounterOperation() {
|
||||
@ -158,7 +161,7 @@ namespace skyline::gpu::engine {
|
||||
|
||||
switch (registers.semaphore.info.structureSize) {
|
||||
case Registers::SemaphoreInfo::StructureSize::OneWord:
|
||||
state.gpu->memoryManager.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack());
|
||||
state.soc->gmmu.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack());
|
||||
break;
|
||||
case Registers::SemaphoreInfo::StructureSize::FourWords: {
|
||||
// Convert the current nanosecond time to GPU ticks
|
||||
@ -168,7 +171,7 @@ namespace skyline::gpu::engine {
|
||||
u64 nsTime{util::GetTimeNs()};
|
||||
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
|
||||
|
||||
state.gpu->memoryManager.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
|
||||
state.soc->gmmu.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
|
||||
break;
|
||||
}
|
||||
}
|
569
app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h
Normal file
569
app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h
Normal file
@ -0,0 +1,569 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "engine.h"
|
||||
#include "maxwell/macro_interpreter.h"
|
||||
|
||||
namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
/**
|
||||
* @brief The Maxwell 3D engine handles processing 3D graphics
|
||||
*/
|
||||
class Maxwell3D : public Engine {
|
||||
private:
|
||||
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
|
||||
|
||||
struct {
|
||||
u32 index;
|
||||
std::vector<u32> arguments;
|
||||
} macroInvocation{}; //!< Data for a macro that is pending execution
|
||||
|
||||
MacroInterpreter macroInterpreter;
|
||||
|
||||
void HandleSemaphoreCounterOperation();
|
||||
|
||||
void WriteSemaphoreResult(u64 result);
|
||||
|
||||
public:
|
||||
static constexpr u32 RegisterCount{0xE00}; //!< The number of Maxwell 3D registers
|
||||
|
||||
/**
|
||||
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
|
||||
*/
|
||||
#pragma pack(push, 1)
|
||||
union Registers {
|
||||
std::array<u32, RegisterCount> raw;
|
||||
|
||||
struct Address {
|
||||
u32 high;
|
||||
u32 low;
|
||||
|
||||
u64 Pack() {
|
||||
return (static_cast<u64>(high) << 32) | low;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Address) == sizeof(u64));
|
||||
|
||||
enum class MmeShadowRamControl : u32 {
|
||||
MethodTrack = 0,
|
||||
MethodTrackWithFilter = 1,
|
||||
MethodPassthrough = 2,
|
||||
MethodReplay = 3,
|
||||
};
|
||||
|
||||
struct ViewportTransform {
|
||||
enum class Swizzle : u8 {
|
||||
PositiveX = 0,
|
||||
NegativeX = 1,
|
||||
PositiveY = 2,
|
||||
NegativeY = 3,
|
||||
PositiveZ = 4,
|
||||
NegativeZ = 5,
|
||||
PositiveW = 6,
|
||||
NegativeW = 7,
|
||||
};
|
||||
|
||||
float scaleX;
|
||||
float scaleY;
|
||||
float scaleZ;
|
||||
float translateX;
|
||||
float translateY;
|
||||
float translateZ;
|
||||
|
||||
struct {
|
||||
Swizzle x : 3;
|
||||
u8 _pad0_ : 1;
|
||||
Swizzle y : 3;
|
||||
u8 _pad1_ : 1;
|
||||
Swizzle z : 3;
|
||||
u8 _pad2_ : 1;
|
||||
Swizzle w : 3;
|
||||
u32 _pad3_ : 17;
|
||||
} swizzles;
|
||||
|
||||
struct {
|
||||
u8 x : 5;
|
||||
u8 _pad0_ : 3;
|
||||
u8 y : 5;
|
||||
u32 _pad1_ : 19;
|
||||
} subpixelPrecisionBias;
|
||||
};
|
||||
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
|
||||
|
||||
struct Viewport {
|
||||
struct {
|
||||
u16 x;
|
||||
u16 width;
|
||||
};
|
||||
|
||||
struct {
|
||||
u16 y;
|
||||
u16 height;
|
||||
};
|
||||
|
||||
float depthRangeNear;
|
||||
float depthRangeFar;
|
||||
};
|
||||
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
|
||||
|
||||
enum class PolygonMode : u32 {
|
||||
Point = 0x1B00,
|
||||
Line = 0x1B01,
|
||||
Fill = 0x1B02,
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 raw;
|
||||
|
||||
enum class Size : u8 {
|
||||
Size_1x32 = 0x12,
|
||||
Size_2x32 = 0x04,
|
||||
Size_3x32 = 0x02,
|
||||
Size_4x32 = 0x01,
|
||||
Size_1x16 = 0x1B,
|
||||
Size_2x16 = 0x0F,
|
||||
Size_3x16 = 0x05,
|
||||
Size_4x16 = 0x03,
|
||||
Size_1x8 = 0x1D,
|
||||
Size_2x8 = 0x18,
|
||||
Size_3x8 = 0x13,
|
||||
Size_4x8 = 0x0A,
|
||||
Size_10_10_10_2 = 0x30,
|
||||
Size_11_11_10 = 0x31,
|
||||
};
|
||||
|
||||
enum class Type : u8 {
|
||||
None = 0,
|
||||
SNorm = 1,
|
||||
UNorm = 2,
|
||||
SInt = 3,
|
||||
UInt = 4,
|
||||
UScaled = 5,
|
||||
SScaled = 6,
|
||||
Float = 7,
|
||||
};
|
||||
|
||||
struct {
|
||||
u8 bufferId : 5;
|
||||
u8 _pad0_ : 1;
|
||||
bool fixed : 1;
|
||||
u16 offset : 14;
|
||||
Size size : 6;
|
||||
Type type : 3;
|
||||
u8 _pad1_ : 1;
|
||||
bool bgra : 1;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(VertexAttribute) == sizeof(u32));
|
||||
|
||||
enum class CompareOp : u32 {
|
||||
Never = 1,
|
||||
Less = 2,
|
||||
Equal = 3,
|
||||
LessOrEqual = 4,
|
||||
Greater = 5,
|
||||
NotEqual = 6,
|
||||
GreaterOrEqual = 7,
|
||||
Always = 8,
|
||||
|
||||
NeverGL = 0x200,
|
||||
LessGL = 0x201,
|
||||
EqualGL = 0x202,
|
||||
LessOrEqualGL = 0x203,
|
||||
GreaterGL = 0x204,
|
||||
NotEqualGL = 0x205,
|
||||
GreaterOrEqualGL = 0x206,
|
||||
AlwaysGL = 0x207,
|
||||
};
|
||||
|
||||
struct Blend {
|
||||
enum class Op : u32 {
|
||||
Add = 1,
|
||||
Subtract = 2,
|
||||
ReverseSubtract = 3,
|
||||
Minimum = 4,
|
||||
Maximum = 5,
|
||||
|
||||
AddGL = 0x8006,
|
||||
SubtractGL = 0x8007,
|
||||
ReverseSubtractGL = 0x8008,
|
||||
MinimumGL = 0x800A,
|
||||
MaximumGL = 0x800B,
|
||||
};
|
||||
|
||||
enum class Factor : u32 {
|
||||
Zero = 0x1,
|
||||
One = 0x2,
|
||||
SourceColor = 0x3,
|
||||
OneMinusSourceColor = 0x4,
|
||||
SourceAlpha = 0x5,
|
||||
OneMinusSourceAlpha = 0x6,
|
||||
DestAlpha = 0x7,
|
||||
OneMinusDestAlpha = 0x8,
|
||||
DestColor = 0x9,
|
||||
OneMinusDestColor = 0xA,
|
||||
SourceAlphaSaturate = 0xB,
|
||||
Source1Color = 0x10,
|
||||
OneMinusSource1Color = 0x11,
|
||||
Source1Alpha = 0x12,
|
||||
OneMinusSource1Alpha = 0x13,
|
||||
ConstantColor = 0x61,
|
||||
OneMinusConstantColor = 0x62,
|
||||
ConstantAlpha = 0x63,
|
||||
OneMinusConstantAlpha = 0x64,
|
||||
|
||||
ZeroGL = 0x4000,
|
||||
OneGL = 0x4001,
|
||||
SourceColorGL = 0x4300,
|
||||
OneMinusSourceColorGL = 0x4301,
|
||||
SourceAlphaGL = 0x4302,
|
||||
OneMinusSourceAlphaGL = 0x4303,
|
||||
DestAlphaGL = 0x4304,
|
||||
OneMinusDestAlphaGL = 0x4305,
|
||||
DestColorGL = 0x4306,
|
||||
OneMinusDestColorGL = 0x4307,
|
||||
SourceAlphaSaturateGL = 0x4308,
|
||||
ConstantColorGL = 0xC001,
|
||||
OneMinusConstantColorGL = 0xC002,
|
||||
ConstantAlphaGL = 0xC003,
|
||||
OneMinusConstantAlphaGL = 0xC004,
|
||||
Source1ColorGL = 0xC900,
|
||||
OneMinusSource1ColorGL = 0xC901,
|
||||
Source1AlphaGL = 0xC902,
|
||||
OneMinusSource1AlphaGL = 0xC903,
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 seperateAlpha;
|
||||
Op colorOp;
|
||||
Factor colorSrcFactor;
|
||||
Factor colorDestFactor;
|
||||
Op alphaOp;
|
||||
Factor alphaSrcFactor;
|
||||
Factor alphaDestFactor;
|
||||
u32 _pad_;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
|
||||
|
||||
enum class StencilOp : u32 {
|
||||
Keep = 1,
|
||||
Zero = 2,
|
||||
Replace = 3,
|
||||
IncrementAndClamp = 4,
|
||||
DecrementAndClamp = 5,
|
||||
Invert = 6,
|
||||
IncrementAndWrap = 7,
|
||||
DecrementAndWrap = 8,
|
||||
};
|
||||
|
||||
enum class FrontFace : u32 {
|
||||
Clockwise = 0x900,
|
||||
CounterClockwise = 0x901,
|
||||
};
|
||||
|
||||
enum class CullFace : u32 {
|
||||
Front = 0x404,
|
||||
Back = 0x405,
|
||||
FrontAndBack = 0x408,
|
||||
};
|
||||
|
||||
union ColorWriteMask {
|
||||
u32 raw;
|
||||
|
||||
struct {
|
||||
u8 r : 4;
|
||||
u8 g : 4;
|
||||
u8 b : 4;
|
||||
u8 a : 4;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(ColorWriteMask) == sizeof(u32));
|
||||
|
||||
struct SemaphoreInfo {
|
||||
enum class Op : u8 {
|
||||
Release = 0,
|
||||
Acquire = 1,
|
||||
Counter = 2,
|
||||
Trap = 3,
|
||||
};
|
||||
|
||||
enum class ReductionOp : u8 {
|
||||
Add = 0,
|
||||
Min = 1,
|
||||
Max = 2,
|
||||
Inc = 3,
|
||||
Dec = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Xor = 7,
|
||||
};
|
||||
|
||||
enum class Unit : u8 {
|
||||
VFetch = 1,
|
||||
VP = 2,
|
||||
Rast = 4,
|
||||
StrmOut = 5,
|
||||
GP = 6,
|
||||
ZCull = 7,
|
||||
Prop = 10,
|
||||
Crop = 15,
|
||||
};
|
||||
|
||||
enum class SyncCondition : u8 {
|
||||
NotEqual = 0,
|
||||
GreaterThan = 1,
|
||||
};
|
||||
|
||||
enum class Format : u8 {
|
||||
U32 = 0,
|
||||
I32 = 1,
|
||||
};
|
||||
|
||||
enum class CounterType : u8 {
|
||||
Zero = 0x0,
|
||||
InputVertices = 0x1,
|
||||
InputPrimitives = 0x3,
|
||||
VertexShaderInvocations = 0x5,
|
||||
GeometryShaderInvocations = 0x7,
|
||||
GeometryShaderPrimitives = 0x9,
|
||||
ZcullStats0 = 0xA,
|
||||
TransformFeedbackPrimitivesWritten = 0xB,
|
||||
ZcullStats1 = 0xC,
|
||||
ZcullStats2 = 0xE,
|
||||
ClipperInputPrimitives = 0xF,
|
||||
ZcullStats3 = 0x10,
|
||||
ClipperOutputPrimitives = 0x11,
|
||||
PrimitivesGenerated = 0x12,
|
||||
FragmentShaderInvocations = 0x13,
|
||||
SamplesPassed = 0x15,
|
||||
TransformFeedbackOffset = 0x1A,
|
||||
TessControlShaderInvocations = 0x1B,
|
||||
TessEvaluationShaderInvocations = 0x1D,
|
||||
TessEvaluationShaderPrimitives = 0x1F,
|
||||
};
|
||||
|
||||
enum class StructureSize : u8 {
|
||||
FourWords = 0,
|
||||
OneWord = 1,
|
||||
};
|
||||
|
||||
Op op : 2;
|
||||
bool flushDisable : 1;
|
||||
bool reductionEnable : 1;
|
||||
bool fenceEnable : 1;
|
||||
u8 _pad0_ : 4;
|
||||
ReductionOp reductionOp : 3;
|
||||
Unit unit : 4;
|
||||
SyncCondition syncCondition : 1;
|
||||
Format format : 2;
|
||||
u8 _pad1_ : 1;
|
||||
bool awakenEnable : 1;
|
||||
u8 _pad2_ : 2;
|
||||
CounterType counterType : 5;
|
||||
StructureSize structureSize : 1;
|
||||
};
|
||||
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
|
||||
|
||||
enum class CoordOrigin : u8 {
|
||||
LowerLeft = 0,
|
||||
UpperLeft = 1,
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 _pad0_[0x40]; // 0x0
|
||||
u32 noOperation; // 0x40
|
||||
u32 _pad1_[0x3]; // 0x41
|
||||
u32 waitForIdle; // 0x44
|
||||
|
||||
struct {
|
||||
u32 instructionRamPointer; // 0x45
|
||||
u32 instructionRamLoad; // 0x46
|
||||
u32 startAddressRamPointer; // 0x47
|
||||
u32 startAddressRamLoad; // 0x48
|
||||
MmeShadowRamControl shadowRamControl; // 0x49
|
||||
} mme;
|
||||
|
||||
u32 _pad2_[0x68]; // 0x4A
|
||||
|
||||
struct {
|
||||
u16 id : 12;
|
||||
u8 _pad0_ : 4;
|
||||
bool flushCache : 1;
|
||||
u8 _pad1_ : 3;
|
||||
bool increment : 1;
|
||||
u16 _pad2_ : 11;
|
||||
} syncpointAction; // 0xB2
|
||||
|
||||
u32 _pad3_[0x2C]; // 0xB3
|
||||
u32 rasterizerEnable; // 0xDF
|
||||
u32 _pad4_[0x1A0]; // 0xE0
|
||||
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
|
||||
std::array<Viewport, 0x10> viewport; // 0x300
|
||||
u32 _pad5_[0x2B]; // 0x340
|
||||
|
||||
struct {
|
||||
PolygonMode front; // 0x36B
|
||||
PolygonMode back; // 0x36C
|
||||
} polygonMode;
|
||||
|
||||
u32 _pad6_[0x68]; // 0x36D
|
||||
|
||||
struct {
|
||||
u32 compareRef; // 0x3D5
|
||||
u32 writeMask; // 0x3D6
|
||||
u32 compareMask; // 0x3D7
|
||||
} stencilBackExtra;
|
||||
|
||||
u32 _pad7_[0x13]; // 0x3D8
|
||||
u32 rtSeparateFragData; // 0x3EB
|
||||
u32 _pad8_[0x6C]; // 0x3EC
|
||||
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
|
||||
u32 _pad9_[0x4B]; // 0x478
|
||||
CompareOp depthTestFunc; // 0x4C3
|
||||
float alphaTestRef; // 0x4C4
|
||||
CompareOp alphaTestFunc; // 0x4C5
|
||||
u32 drawTFBStride; // 0x4C6
|
||||
|
||||
struct {
|
||||
float r; // 0x4C7
|
||||
float g; // 0x4C8
|
||||
float b; // 0x4C9
|
||||
float a; // 0x4CA
|
||||
} blendConstant;
|
||||
|
||||
u32 _pad10_[0x4]; // 0x4CB
|
||||
|
||||
struct {
|
||||
u32 seperateAlpha; // 0x4CF
|
||||
Blend::Op colorOp; // 0x4D0
|
||||
Blend::Factor colorSrcFactor; // 0x4D1
|
||||
Blend::Factor colorDestFactor; // 0x4D2
|
||||
Blend::Op alphaOp; // 0x4D3
|
||||
Blend::Factor alphaSrcFactor; // 0x4D4
|
||||
u32 _pad_; // 0x4D5
|
||||
Blend::Factor alphaDestFactor; // 0x4D6
|
||||
|
||||
u32 enableCommon; // 0x4D7
|
||||
std::array<u32, 8> enable; // 0x4D8 For each render target
|
||||
} blend;
|
||||
|
||||
u32 stencilEnable; // 0x4E0
|
||||
|
||||
struct {
|
||||
StencilOp failOp; // 0x4E1
|
||||
StencilOp zFailOp; // 0x4E2
|
||||
StencilOp zPassOp; // 0x4E3
|
||||
|
||||
struct {
|
||||
CompareOp op; // 0x4E4
|
||||
i32 ref; // 0x4E5
|
||||
u32 mask; // 0x4E6
|
||||
} compare;
|
||||
|
||||
u32 writeMask; // 0x4E7
|
||||
} stencilFront;
|
||||
|
||||
u32 _pad11_[0x4]; // 0x4E8
|
||||
float lineWidthSmooth; // 0x4EC
|
||||
float lineWidthAliased; // 0x4D
|
||||
u32 _pad12_[0x1F]; // 0x4EE
|
||||
u32 drawBaseVertex; // 0x50D
|
||||
u32 drawBaseInstance; // 0x50E
|
||||
u32 _pad13_[0x35]; // 0x50F
|
||||
u32 clipDistanceEnable; // 0x544
|
||||
u32 sampleCounterEnable; // 0x545
|
||||
float pointSpriteSize; // 0x546
|
||||
u32 zCullStatCountersEnable; // 0x547
|
||||
u32 pointSpriteEnable; // 0x548
|
||||
u32 _pad14_; // 0x549
|
||||
u32 shaderExceptions; // 0x54A
|
||||
u32 _pad15_[0x2]; // 0x54B
|
||||
u32 multisampleEnable; // 0x54D
|
||||
u32 depthTargetEnable; // 0x54E
|
||||
|
||||
struct {
|
||||
bool alphaToCoverage : 1;
|
||||
u8 _pad0_ : 3;
|
||||
bool alphaToOne : 1;
|
||||
u32 _pad1_ : 27;
|
||||
} multisampleControl; // 0x54F
|
||||
|
||||
u32 _pad16_[0x7]; // 0x550
|
||||
|
||||
struct {
|
||||
Address address; // 0x557
|
||||
u32 maximumIndex; // 0x559
|
||||
} texSamplerPool;
|
||||
|
||||
u32 _pad17_; // 0x55A
|
||||
u32 polygonOffsetFactor; // 0x55B
|
||||
u32 lineSmoothEnable; // 0x55C
|
||||
|
||||
struct {
|
||||
Address address; // 0x55D
|
||||
u32 maximumIndex; // 0x55F
|
||||
} texHeaderPool;
|
||||
|
||||
u32 _pad18_[0x5]; // 0x560
|
||||
|
||||
u32 stencilTwoSideEnable; // 0x565
|
||||
|
||||
struct {
|
||||
StencilOp failOp; // 0x566
|
||||
StencilOp zFailOp; // 0x567
|
||||
StencilOp zPassOp; // 0x568
|
||||
CompareOp compareOp; // 0x569
|
||||
} stencilBack;
|
||||
|
||||
u32 _pad19_[0x17]; // 0x56A
|
||||
|
||||
struct {
|
||||
u8 _unk_ : 2;
|
||||
CoordOrigin origin : 1;
|
||||
u16 enable : 10;
|
||||
u32 _pad_ : 19;
|
||||
} pointCoordReplace; // 0x581
|
||||
|
||||
u32 _pad20_[0xC4]; // 0x582
|
||||
u32 cullFaceEnable; // 0x646
|
||||
FrontFace frontFace; // 0x647
|
||||
CullFace cullFace; // 0x648
|
||||
u32 pixelCentreImage; // 0x649
|
||||
u32 _pad21_; // 0x64A
|
||||
u32 viewportTransformEnable; // 0x64B
|
||||
u32 _pad22_[0x34]; // 0x64A
|
||||
std::array<ColorWriteMask, 8> colorMask; // 0x680 For each render target
|
||||
u32 _pad23_[0x38]; // 0x688
|
||||
|
||||
struct {
|
||||
Address address; // 0x6C0
|
||||
u32 payload; // 0x6C2
|
||||
SemaphoreInfo info; // 0x6C3
|
||||
} semaphore;
|
||||
|
||||
u32 _pad24_[0xBC]; // 0x6C4
|
||||
std::array<Blend, 8> independentBlend; // 0x780 For each render target
|
||||
u32 _pad25_[0x100]; // 0x7C0
|
||||
u32 firmwareCall[0x20]; // 0x8C0
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
|
||||
#pragma pack(pop)
|
||||
|
||||
Registers registers{};
|
||||
Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
|
||||
|
||||
std::array<u32, 0x10000> macroCode{}; //!< This stores GPU macros, the 256kb size is from Ryujinx
|
||||
|
||||
Maxwell3D(const DeviceState &state);
|
||||
|
||||
/**
|
||||
* @brief Resets the Maxwell 3D registers to their default values
|
||||
*/
|
||||
void ResetRegs();
|
||||
|
||||
void CallMethod(MethodParams params) override;
|
||||
};
|
||||
}
|
@ -4,30 +4,28 @@
|
||||
#include <common/signal.h>
|
||||
#include <loader/loader.h>
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include <gpu.h>
|
||||
#include <gpu/engines/maxwell_3d.h>
|
||||
#include "gpfifo.h"
|
||||
#include <soc.h>
|
||||
|
||||
namespace skyline::gpu::gpfifo {
|
||||
namespace skyline::soc::gm20b {
|
||||
void GPFIFO::Send(MethodParams params) {
|
||||
state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
|
||||
|
||||
if (params.method == 0) {
|
||||
switch (static_cast<EngineID>(params.argument)) {
|
||||
case EngineID::Fermi2D:
|
||||
subchannels.at(params.subChannel) = state.gpu->fermi2D;
|
||||
subchannels.at(params.subChannel) = &state.soc->gm20b.fermi2D;
|
||||
break;
|
||||
case EngineID::KeplerMemory:
|
||||
subchannels.at(params.subChannel) = state.gpu->keplerMemory;
|
||||
subchannels.at(params.subChannel) = &state.soc->gm20b.keplerMemory;
|
||||
break;
|
||||
case EngineID::Maxwell3D:
|
||||
subchannels.at(params.subChannel) = state.gpu->maxwell3D;
|
||||
subchannels.at(params.subChannel) = &state.soc->gm20b.maxwell3D;
|
||||
break;
|
||||
case EngineID::MaxwellCompute:
|
||||
subchannels.at(params.subChannel) = state.gpu->maxwellCompute;
|
||||
subchannels.at(params.subChannel) = &state.soc->gm20b.maxwellCompute;
|
||||
break;
|
||||
case EngineID::MaxwellDma:
|
||||
subchannels.at(params.subChannel) = state.gpu->maxwellDma;
|
||||
subchannels.at(params.subChannel) = &state.soc->gm20b.maxwellDma;
|
||||
break;
|
||||
default:
|
||||
throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel);
|
||||
@ -35,7 +33,7 @@ namespace skyline::gpu::gpfifo {
|
||||
|
||||
state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel);
|
||||
return;
|
||||
} else if (params.method < constant::GpfifoRegisterCount) {
|
||||
} else if (params.method < engine::GPFIFO::RegisterCount) {
|
||||
gpfifoEngine.CallMethod(params);
|
||||
} else {
|
||||
if (subchannels.at(params.subChannel) == nullptr)
|
||||
@ -58,7 +56,7 @@ namespace skyline::gpu::gpfifo {
|
||||
}
|
||||
|
||||
pushBufferData.resize(gpEntry.size);
|
||||
state.gpu->memoryManager.Read<u32>(pushBufferData, gpEntry.Address());
|
||||
state.soc->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
|
||||
|
||||
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
|
||||
// An entry containing all zeroes is a NOP, skip over it
|
||||
@ -66,28 +64,29 @@ namespace skyline::gpu::gpfifo {
|
||||
continue;
|
||||
|
||||
PushBufferMethodHeader methodHeader{.raw = *entry};
|
||||
|
||||
switch (methodHeader.secOp) {
|
||||
case PushBufferMethodHeader::SecOp::IncMethod:
|
||||
for (u16 i{}; i < methodHeader.methodCount; i++)
|
||||
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
|
||||
|
||||
break;
|
||||
|
||||
case PushBufferMethodHeader::SecOp::NonIncMethod:
|
||||
for (u16 i{}; i < methodHeader.methodCount; i++)
|
||||
Send(MethodParams{methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
|
||||
|
||||
break;
|
||||
|
||||
case PushBufferMethodHeader::SecOp::OneInc:
|
||||
for (u16 i{}; i < methodHeader.methodCount; i++)
|
||||
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + static_cast<bool>(i)), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
|
||||
|
||||
break;
|
||||
|
||||
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
|
||||
Send(MethodParams{methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true});
|
||||
break;
|
||||
|
||||
case PushBufferMethodHeader::SecOp::EndPbSegment:
|
||||
return;
|
||||
|
||||
default:
|
||||
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
|
||||
break;
|
168
app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
Normal file
168
app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
Normal file
@ -0,0 +1,168 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/circular_queue.h>
|
||||
#include "engines/gpfifo.h"
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
/**
|
||||
* @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
|
||||
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
|
||||
*/
|
||||
struct GpEntry {
|
||||
enum class Fetch : u8 {
|
||||
Unconditional = 0,
|
||||
Conditional = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
u32 entry0;
|
||||
|
||||
struct {
|
||||
Fetch fetch : 1;
|
||||
u8 _pad_ : 1;
|
||||
u32 get : 30;
|
||||
};
|
||||
};
|
||||
|
||||
enum class Opcode : u8 {
|
||||
Nop = 0,
|
||||
Illegal = 1,
|
||||
Crc = 2,
|
||||
PbCrc = 3,
|
||||
};
|
||||
|
||||
enum class Priv : u8 {
|
||||
User = 0,
|
||||
Kernel = 1,
|
||||
};
|
||||
|
||||
enum class Level : u8 {
|
||||
Main = 0,
|
||||
Subroutine = 1,
|
||||
};
|
||||
|
||||
enum class Sync : u8 {
|
||||
Proceed = 0,
|
||||
Wait = 1,
|
||||
};
|
||||
|
||||
union {
|
||||
u32 entry1;
|
||||
|
||||
struct {
|
||||
union {
|
||||
u8 getHi;
|
||||
Opcode opcode;
|
||||
};
|
||||
|
||||
Priv priv : 1;
|
||||
Level level : 1;
|
||||
u32 size : 21;
|
||||
Sync sync : 1;
|
||||
};
|
||||
};
|
||||
|
||||
constexpr u64 Address() const {
|
||||
return (static_cast<u64>(getHi) << 32) | (static_cast<u64>(get) << 2);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(GpEntry) == sizeof(u64));
|
||||
|
||||
/**
|
||||
* @brief A single pushbuffer method header that describes a compressed method sequence
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
|
||||
*/
|
||||
union PushBufferMethodHeader {
|
||||
u32 raw;
|
||||
|
||||
enum class TertOp : u8 {
|
||||
Grp0IncMethod = 0,
|
||||
Grp0SetSubDevMask = 1,
|
||||
Grp0StoreSubDevMask = 2,
|
||||
Grp0UseSubDevMask = 3,
|
||||
Grp2NonIncMethod = 0,
|
||||
};
|
||||
|
||||
enum class SecOp : u8 {
|
||||
Grp0UseTert = 0,
|
||||
IncMethod = 1,
|
||||
Grp2UseTert = 2,
|
||||
NonIncMethod = 3,
|
||||
ImmdDataMethod = 4,
|
||||
OneInc = 5,
|
||||
Reserved6 = 6,
|
||||
EndPbSegment = 7,
|
||||
};
|
||||
|
||||
u16 methodAddress : 12;
|
||||
struct {
|
||||
u8 _pad0_ : 4;
|
||||
u16 subDeviceMask : 12;
|
||||
};
|
||||
|
||||
struct {
|
||||
u16 _pad1_ : 13;
|
||||
u8 methodSubChannel : 3;
|
||||
union {
|
||||
TertOp tertOp : 3;
|
||||
u16 methodCount : 13;
|
||||
u16 immdData : 13;
|
||||
};
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 _pad2_ : 29;
|
||||
SecOp secOp : 3;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
|
||||
|
||||
/**
|
||||
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
|
||||
* @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
|
||||
*/
|
||||
class GPFIFO {
|
||||
const DeviceState &state;
|
||||
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
|
||||
std::array<engine::Engine*, 8> subchannels;
|
||||
std::optional<CircularQueue<GpEntry>> pushBuffers;
|
||||
std::thread thread; //!< The thread that manages processing of pushbuffers
|
||||
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
|
||||
|
||||
/**
|
||||
* @brief Sends a method call to the GPU hardware
|
||||
*/
|
||||
void Send(MethodParams params);
|
||||
|
||||
/**
|
||||
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
|
||||
*/
|
||||
void Process(GpEntry gpEntry);
|
||||
|
||||
public:
|
||||
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
|
||||
|
||||
~GPFIFO();
|
||||
|
||||
/**
|
||||
* @param numBuffers The amount of push-buffers to allocate in the circular buffer
|
||||
*/
|
||||
void Initialize(size_t numBuffers);
|
||||
|
||||
/**
|
||||
* @brief Executes all pending entries in the FIFO
|
||||
*/
|
||||
void Run();
|
||||
|
||||
/**
|
||||
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
|
||||
*/
|
||||
void Push(span<GpEntry> entries);
|
||||
};
|
||||
}
|
@ -2,10 +2,12 @@
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include "memory_manager.h"
|
||||
#include "gmmu.h"
|
||||
|
||||
namespace skyline::gpu::vmm {
|
||||
MemoryManager::MemoryManager(const DeviceState &state) : state(state) {
|
||||
namespace skyline::soc::gmmu {
|
||||
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
|
||||
|
||||
GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) {
|
||||
constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space
|
||||
constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero
|
||||
|
||||
@ -14,9 +16,9 @@ namespace skyline::gpu::vmm {
|
||||
chunks.push_back(baseChunk);
|
||||
}
|
||||
|
||||
std::optional<ChunkDescriptor> MemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
|
||||
std::optional<ChunkDescriptor> GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
|
||||
auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool {
|
||||
return (alignment ? util::IsAligned(chunk.virtAddr, alignment) : true) && chunk.size > size && chunk.state == desiredState;
|
||||
return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState;
|
||||
})};
|
||||
|
||||
if (chunk != chunks.end())
|
||||
@ -25,12 +27,12 @@ namespace skyline::gpu::vmm {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u64 MemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
|
||||
u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
|
||||
auto chunkEnd{chunks.end()};
|
||||
for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) {
|
||||
if (chunk->CanContain(newChunk)) {
|
||||
auto oldChunk{*chunk};
|
||||
u64 newSize{newChunk.virtAddr - chunk->virtAddr};
|
||||
u64 newSize{newChunk.virtualAddress - chunk->virtualAddress};
|
||||
u64 extension{chunk->size - newSize - newChunk.size};
|
||||
|
||||
if (newSize == 0) {
|
||||
@ -41,16 +43,16 @@ namespace skyline::gpu::vmm {
|
||||
}
|
||||
|
||||
if (extension)
|
||||
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtAddr + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
|
||||
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
|
||||
|
||||
return newChunk.virtAddr;
|
||||
} else if (chunk->virtAddr + chunk->size > newChunk.virtAddr) {
|
||||
chunk->size = newChunk.virtAddr - chunk->virtAddr;
|
||||
return newChunk.virtualAddress;
|
||||
} else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) {
|
||||
chunk->size = newChunk.virtualAddress - chunk->virtualAddress;
|
||||
|
||||
// Deletes all chunks that are within the chunk being inserted and split the final one
|
||||
auto tailChunk{std::next(chunk)};
|
||||
while (tailChunk != chunkEnd) {
|
||||
if (tailChunk->virtAddr + tailChunk->size >= newChunk.virtAddr + newChunk.size)
|
||||
if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size)
|
||||
break;
|
||||
|
||||
tailChunk = chunks.erase(tailChunk);
|
||||
@ -61,8 +63,8 @@ namespace skyline::gpu::vmm {
|
||||
if (tailChunk == chunkEnd)
|
||||
break;
|
||||
|
||||
u64 chunkSliceOffset{newChunk.virtAddr + newChunk.size - tailChunk->virtAddr};
|
||||
tailChunk->virtAddr += chunkSliceOffset;
|
||||
u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress};
|
||||
tailChunk->virtualAddress += chunkSliceOffset;
|
||||
tailChunk->size -= chunkSliceOffset;
|
||||
if (tailChunk->state == ChunkState::Mapped)
|
||||
tailChunk->cpuPtr += chunkSliceOffset;
|
||||
@ -74,19 +76,19 @@ namespace skyline::gpu::vmm {
|
||||
else
|
||||
chunks.insert(std::next(headChunk), newChunk);
|
||||
|
||||
return newChunk.virtAddr;
|
||||
return newChunk.virtualAddress;
|
||||
}
|
||||
}
|
||||
|
||||
throw exception("Failed to insert chunk into GPU address space!");
|
||||
}
|
||||
|
||||
u64 MemoryManager::ReserveSpace(u64 size, u64 alignment) {
|
||||
size = util::AlignUp(size, constant::GpuPageSize);
|
||||
u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) {
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(vmmMutex);
|
||||
std::unique_lock lock(mutex);
|
||||
auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)};
|
||||
if (!newChunk)
|
||||
if (!newChunk) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
auto chunk{*newChunk};
|
||||
@ -96,22 +98,22 @@ namespace skyline::gpu::vmm {
|
||||
return InsertChunk(chunk);
|
||||
}
|
||||
|
||||
u64 MemoryManager::ReserveFixed(u64 virtAddr, u64 size) {
|
||||
if (!util::IsAligned(virtAddr, constant::GpuPageSize))
|
||||
u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) {
|
||||
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
size = util::AlignUp(size, constant::GpuPageSize);
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(vmmMutex);
|
||||
return InsertChunk(ChunkDescriptor(virtAddr, size, nullptr, ChunkState::Reserved));
|
||||
std::unique_lock lock(mutex);
|
||||
return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved));
|
||||
}
|
||||
|
||||
u64 MemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
|
||||
size = util::AlignUp(size, constant::GpuPageSize);
|
||||
u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(vmmMutex);
|
||||
std::unique_lock lock(mutex);
|
||||
auto mappedChunk{FindChunk(ChunkState::Unmapped, size)};
|
||||
if (!mappedChunk)
|
||||
if (!mappedChunk) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
auto chunk{*mappedChunk};
|
||||
@ -122,23 +124,23 @@ namespace skyline::gpu::vmm {
|
||||
return InsertChunk(chunk);
|
||||
}
|
||||
|
||||
u64 MemoryManager::MapFixed(u64 virtAddr, u8 *cpuPtr, u64 size) {
|
||||
if (!util::IsAligned(virtAddr, constant::GpuPageSize))
|
||||
u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) {
|
||||
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
size = util::AlignUp(size, constant::GpuPageSize);
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(vmmMutex);
|
||||
return InsertChunk(ChunkDescriptor(virtAddr, size, cpuPtr, ChunkState::Mapped));
|
||||
std::unique_lock lock(mutex);
|
||||
return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped));
|
||||
}
|
||||
|
||||
bool MemoryManager::Unmap(u64 virtAddr, u64 size) {
|
||||
if (!util::IsAligned(virtAddr, constant::GpuPageSize))
|
||||
bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) {
|
||||
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
|
||||
return false;
|
||||
|
||||
try {
|
||||
std::unique_lock lock(vmmMutex);
|
||||
InsertChunk(ChunkDescriptor(virtAddr, size, nullptr, ChunkState::Unmapped));
|
||||
std::unique_lock lock(mutex);
|
||||
InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped));
|
||||
} catch (const std::exception &e) {
|
||||
return false;
|
||||
}
|
||||
@ -146,20 +148,20 @@ namespace skyline::gpu::vmm {
|
||||
return true;
|
||||
}
|
||||
|
||||
void MemoryManager::Read(u8 *destination, u64 virtAddr, u64 size) {
|
||||
std::shared_lock lock(vmmMutex);
|
||||
void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtAddr, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
|
||||
return address < chunk.virtAddr;
|
||||
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
|
||||
return address < chunk.virtualAddress;
|
||||
})};
|
||||
|
||||
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
|
||||
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
chunk--;
|
||||
|
||||
u64 initialSize{size};
|
||||
u64 chunkOffset{virtAddr - chunk->virtAddr};
|
||||
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
|
||||
u8 *source{chunk->cpuPtr + chunkOffset};
|
||||
u64 sourceSize{std::min(chunk->size - chunkOffset, size)};
|
||||
|
||||
@ -170,7 +172,7 @@ namespace skyline::gpu::vmm {
|
||||
size -= sourceSize;
|
||||
if (size) {
|
||||
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
|
||||
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
source = chunk->cpuPtr;
|
||||
sourceSize = std::min(chunk->size, size);
|
||||
@ -178,20 +180,20 @@ namespace skyline::gpu::vmm {
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryManager::Write(u8 *source, u64 virtAddr, u64 size) {
|
||||
std::shared_lock lock(vmmMutex);
|
||||
void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtAddr, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
|
||||
return address < chunk.virtAddr;
|
||||
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
|
||||
return address < chunk.virtualAddress;
|
||||
})};
|
||||
|
||||
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
|
||||
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
chunk--;
|
||||
|
||||
u64 initialSize{size};
|
||||
u64 chunkOffset{virtAddr - chunk->virtAddr};
|
||||
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
|
||||
u8 *destination{chunk->cpuPtr + chunkOffset};
|
||||
u64 destinationSize{std::min(chunk->size - chunkOffset, size)};
|
||||
|
||||
@ -202,7 +204,7 @@ namespace skyline::gpu::vmm {
|
||||
size -= destinationSize;
|
||||
if (size) {
|
||||
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
|
||||
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
destination = chunk->cpuPtr;
|
||||
destinationSize = std::min(chunk->size, size);
|
140
app/src/main/cpp/skyline/soc/gmmu.h
Normal file
140
app/src/main/cpp/skyline/soc/gmmu.h
Normal file
@ -0,0 +1,140 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline::soc::gmmu {
|
||||
enum class ChunkState {
|
||||
Unmapped, //!< The chunk is unmapped
|
||||
Reserved, //!< The chunk is reserved
|
||||
Mapped //!< The chunk is mapped and a CPU side address is present
|
||||
};
|
||||
|
||||
struct ChunkDescriptor {
|
||||
u64 virtualAddress; //!< The address of the chunk in the virtual address space
|
||||
u64 size; //!< The size of the chunk in bytes
|
||||
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
|
||||
ChunkState state;
|
||||
|
||||
ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {}
|
||||
|
||||
/**
|
||||
* @return If the given chunk can be contained wholly within this chunk
|
||||
*/
|
||||
inline bool CanContain(const ChunkDescriptor &chunk) {
|
||||
return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
|
||||
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment
|
||||
*/
|
||||
class GraphicsMemoryManager {
|
||||
private:
|
||||
const DeviceState &state;
|
||||
std::vector<ChunkDescriptor> chunks;
|
||||
std::shared_mutex mutex;
|
||||
|
||||
/**
|
||||
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
|
||||
* @note vmmMutex MUST be locked when calling this
|
||||
* @param desiredState The state of the chunk to find
|
||||
* @param size The minimum size of the chunk to find
|
||||
* @param alignment The minimum alignment of the chunk to find
|
||||
* @return The first applicable chunk
|
||||
*/
|
||||
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
|
||||
|
||||
/**
|
||||
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
|
||||
* @note vmmMutex MUST be locked when calling this
|
||||
* @param newChunk The chunk to insert
|
||||
* @return The base virtual address of the inserted chunk
|
||||
*/
|
||||
u64 InsertChunk(const ChunkDescriptor &newChunk);
|
||||
|
||||
public:
|
||||
GraphicsMemoryManager(const DeviceState &state);
|
||||
|
||||
/**
|
||||
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
|
||||
* @param size The size of the region to reserve
|
||||
* @param alignment The alignment of the region to reserve
|
||||
* @return The base virtual address of the reserved region
|
||||
*/
|
||||
u64 ReserveSpace(u64 size, u64 alignment);
|
||||
|
||||
/**
|
||||
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
|
||||
* @param virtualAddress The virtual base address of the region to allocate
|
||||
* @param size The size of the region to allocate
|
||||
* @return The base virtual address of the reserved region
|
||||
*/
|
||||
u64 ReserveFixed(u64 virtualAddress, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
|
||||
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
|
||||
* @param size The size of the region to map
|
||||
* @return The base virtual address of the mapped region
|
||||
*/
|
||||
u64 MapAllocate(u8 *cpuPtr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Maps a CPU memory region to a fixed region in the virtual address space
|
||||
* @param virtualAddress The target virtual address of the region
|
||||
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
|
||||
* @param size The size of the region to map
|
||||
* @return The base virtual address of the mapped region
|
||||
*/
|
||||
u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Unmaps all chunks in the given region from the virtual address space
|
||||
* @return Whether the operation succeeded
|
||||
*/
|
||||
bool Unmap(u64 virtualAddress, u64 size);
|
||||
|
||||
void Read(u8 *destination, u64 virtualAddress, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Reads in a span from a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Read(span <T> destination, u64 virtualAddress) {
|
||||
Read(reinterpret_cast<u8 *>(destination.data()), virtualAddress, destination.size_bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads in an object from a region of the virtual address space
|
||||
* @tparam T The type of object to return
|
||||
*/
|
||||
template<typename T>
|
||||
T Read(u64 virtualAddress) {
|
||||
T obj;
|
||||
Read(reinterpret_cast<u8 *>(&obj), virtualAddress, sizeof(T));
|
||||
return obj;
|
||||
}
|
||||
|
||||
void Write(u8 *source, u64 virtualAddress, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Writes out a span to a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Write(span <T> source, u64 virtualAddress) {
|
||||
Write(reinterpret_cast<u8 *>(source.data()), virtualAddress, source.size_bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads in an object from a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Write(T source, u64 virtualAddress) {
|
||||
Write(reinterpret_cast<u8 *>(&source), virtualAddress, sizeof(T));
|
||||
}
|
||||
};
|
||||
}
|
17
app/src/main/cpp/skyline/soc/host1x.h
Normal file
17
app/src/main/cpp/skyline/soc/host1x.h
Normal file
@ -0,0 +1,17 @@
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "host1x/syncpoint.h"
|
||||
|
||||
namespace skyline::soc::host1x {
|
||||
/**
|
||||
* @brief An abstraction for the graphics host, this handles DMA on behalf of the CPU when communicating to it's clients alongside handling syncpts
|
||||
* @note This is different from the GM20B Host, it serves a similar function and has an interface for accessing Host1X syncpts
|
||||
*/
|
||||
class Host1X {
|
||||
public:
|
||||
std::array<Syncpoint, SyncpointCount> syncpoints{};
|
||||
};
|
||||
}
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "syncpoint.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace skyline::soc::host1x {
|
||||
u64 Syncpoint::RegisterWaiter(u32 threshold, const std::function<void()> &callback) {
|
||||
if (value >= threshold) {
|
||||
callback();
|
52
app/src/main/cpp/skyline/soc/host1x/syncpoint.h
Normal file
52
app/src/main/cpp/skyline/soc/host1x/syncpoint.h
Normal file
@ -0,0 +1,52 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline::soc::host1x {
|
||||
constexpr size_t SyncpointCount{192}; //!< The number of host1x syncpoints on T210
|
||||
|
||||
/**
|
||||
* @brief The Syncpoint class represents a single syncpoint in the GPU which is used for GPU -> CPU synchronisation
|
||||
*/
|
||||
class Syncpoint {
|
||||
private:
|
||||
struct Waiter {
|
||||
u32 threshold; //!< The syncpoint value to wait on to be reached
|
||||
std::function<void()> callback; //!< The callback to do after the wait has ended
|
||||
};
|
||||
|
||||
std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
|
||||
std::map<u64, Waiter> waiterMap;
|
||||
u64 nextWaiterId{1};
|
||||
|
||||
public:
|
||||
std::atomic<u32> value{};
|
||||
|
||||
/**
|
||||
* @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
|
||||
* @note The callback will be called immediately if the syncpoint has already reached the given threshold
|
||||
* @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
|
||||
*/
|
||||
u64 RegisterWaiter(u32 threshold, const std::function<void()> &callback);
|
||||
|
||||
/**
|
||||
* @brief Removes a waiter given by 'id' from the pending waiter map
|
||||
*/
|
||||
void DeregisterWaiter(u64 id);
|
||||
|
||||
/**
|
||||
* @brief Increments the syncpoint by 1
|
||||
* @return The new value of the syncpoint
|
||||
*/
|
||||
u32 Increment();
|
||||
|
||||
/**
|
||||
* @brief Waits for the syncpoint to reach given threshold
|
||||
* @return false if the timeout was reached, otherwise true
|
||||
*/
|
||||
bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
|
||||
};
|
||||
}
|
Loading…
Reference in New Issue
Block a user