mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-29 16:25:36 +03:00
Implement and cleanup semaphore operations in all engines
Most engines have the capability to release a semaphore payload (or reduce in the case of GPFIFO) when a method is called or action is complete. Semaphores are used by games for both timing how long things take on GPU and waiting on resources so missing them can cause deadlocks or other related issues.
This commit is contained in:
parent
bca88685bd
commit
b81d5bc865
@ -4,6 +4,15 @@
|
|||||||
#include "engine.h"
|
#include "engine.h"
|
||||||
|
|
||||||
namespace skyline::soc::gm20b::engine {
|
namespace skyline::soc::gm20b::engine {
|
||||||
|
u64 GetGpuTimeTicks() {
|
||||||
|
constexpr i64 NsToTickNumerator{384};
|
||||||
|
constexpr i64 NsToTickDenominator{625};
|
||||||
|
|
||||||
|
i64 nsTime{util::GetTimeNs()};
|
||||||
|
i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
|
||||||
|
return static_cast<u64>(timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
MacroEngineBase::MacroEngineBase(MacroState ¯oState) : macroState(macroState) {}
|
MacroEngineBase::MacroEngineBase(MacroState ¯oState) : macroState(macroState) {}
|
||||||
|
|
||||||
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {
|
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {
|
||||||
|
@ -49,6 +49,11 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
|
|
||||||
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
|
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns current time in GPU ticks
|
||||||
|
*/
|
||||||
|
u64 GetGpuTimeTicks();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer
|
* @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer
|
||||||
*/
|
*/
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
|
||||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
#include <soc.h>
|
#include <soc.h>
|
||||||
|
#include <soc/gm20b/gmmu.h>
|
||||||
#include <soc/gm20b/channel.h>
|
#include <soc/gm20b/channel.h>
|
||||||
#include "gpfifo.h"
|
#include "gpfifo.h"
|
||||||
|
|
||||||
@ -26,6 +28,56 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max());
|
syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max());
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
ENGINE_STRUCT_CASE(semaphore, action, {
|
||||||
|
// Write timestamp first to ensure ordering
|
||||||
|
if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) {
|
||||||
|
channelCtx.asCtx->gmmu.Write<u32>(registers.semaphore->address + 4, 0);
|
||||||
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (action.operation == Registers::Semaphore::Operation::Release) {
|
||||||
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload);
|
||||||
|
} else if (action.operation == Registers::Semaphore::Operation::Reduction) {
|
||||||
|
u32 origVal{channelCtx.asCtx->gmmu.Read<u32>(registers.semaphore->address)};
|
||||||
|
bool isSigned{action.format == Registers::Semaphore::Format::Signed};
|
||||||
|
|
||||||
|
// https://github.com/NVIDIA/open-gpu-doc/blob/b7d1bd16fe62135ebaec306b39dfdbd9e5657827/manuals/turing/tu104/dev_pbdma.ref.txt#L3549
|
||||||
|
u32 val{[](Registers::Semaphore::Reduction reduction, u32 origVal, u32 payload, bool isSigned) {
|
||||||
|
switch (reduction) {
|
||||||
|
case Registers::Semaphore::Reduction::Min:
|
||||||
|
if (isSigned)
|
||||||
|
return static_cast<u32>(std::min(static_cast<i32>(origVal), static_cast<i32>(payload)));
|
||||||
|
else
|
||||||
|
return std::min(origVal, payload);
|
||||||
|
case Registers::Semaphore::Reduction::Max:
|
||||||
|
if (isSigned)
|
||||||
|
return static_cast<u32>(std::max(static_cast<i32>(origVal), static_cast<i32>(payload)));
|
||||||
|
else
|
||||||
|
return std::max(origVal, payload);
|
||||||
|
case Registers::Semaphore::Reduction::Xor:
|
||||||
|
return origVal ^ payload;
|
||||||
|
case Registers::Semaphore::Reduction::And:
|
||||||
|
return origVal & payload;
|
||||||
|
case Registers::Semaphore::Reduction::Or:
|
||||||
|
return origVal | payload;
|
||||||
|
case Registers::Semaphore::Reduction::Add:
|
||||||
|
if (isSigned)
|
||||||
|
return static_cast<u32>(static_cast<i32>(origVal) + static_cast<i32>(payload));
|
||||||
|
else
|
||||||
|
return origVal + payload;
|
||||||
|
case Registers::Semaphore::Reduction::Inc:
|
||||||
|
return (origVal >= payload) ? 0 : origVal + 1;
|
||||||
|
case Registers::Semaphore::Reduction::Dec:
|
||||||
|
return (origVal == 0 || origVal > payload) ? payload : origVal - 1;
|
||||||
|
}
|
||||||
|
}(registers.semaphore->action.reduction, origVal, registers.semaphore->payload, isSigned)};
|
||||||
|
|
||||||
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, val);
|
||||||
|
} else {
|
||||||
|
Logger::Warn("Unimplemented semaphore operation: 0x{:X}", static_cast<u8>(registers.semaphore->action.operation));
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -81,16 +81,7 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
Unsigned = 1,
|
Unsigned = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct {
|
Address address; // 0x4
|
||||||
u32 offsetUpper : 8;
|
|
||||||
u32 _pad0_ : 24;
|
|
||||||
}; // 0x4
|
|
||||||
|
|
||||||
struct {
|
|
||||||
u8 _pad1_ : 2;
|
|
||||||
u32 offsetLower : 30;
|
|
||||||
}; // 0x5
|
|
||||||
|
|
||||||
u32 payload; // 0x6
|
u32 payload; // 0x6
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
@ -104,7 +95,7 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
u8 _pad5_ : 2;
|
u8 _pad5_ : 2;
|
||||||
Reduction reduction : 4;
|
Reduction reduction : 4;
|
||||||
Format format : 1;
|
Format format : 1;
|
||||||
}; // 0x7
|
} action; // 0x7
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Semaphore) == 0x10);
|
static_assert(sizeof(Semaphore) == 0x10);
|
||||||
|
|
||||||
|
@ -688,6 +688,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
})
|
})
|
||||||
|
|
||||||
ENGINE_STRUCT_CASE(semaphore, info, {
|
ENGINE_STRUCT_CASE(semaphore, info, {
|
||||||
|
if (info.reductionEnable)
|
||||||
|
Logger::Warn("Semaphore reduction is unimplemented!");
|
||||||
|
|
||||||
switch (info.op) {
|
switch (info.op) {
|
||||||
case type::SemaphoreInfo::Op::Release:
|
case type::SemaphoreInfo::Op::Release:
|
||||||
WriteSemaphoreResult(registers.semaphore->payload);
|
WriteSemaphoreResult(registers.semaphore->payload);
|
||||||
@ -751,26 +754,15 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::WriteSemaphoreResult(u64 result) {
|
void Maxwell3D::WriteSemaphoreResult(u64 result) {
|
||||||
struct FourWordResult {
|
|
||||||
u64 value;
|
|
||||||
u64 timestamp;
|
|
||||||
};
|
|
||||||
|
|
||||||
switch (registers.semaphore->info.structureSize) {
|
switch (registers.semaphore->info.structureSize) {
|
||||||
case type::SemaphoreInfo::StructureSize::OneWord:
|
case type::SemaphoreInfo::StructureSize::OneWord:
|
||||||
channelCtx.asCtx->gmmu.Write<u32>(registers.semaphore->address, static_cast<u32>(result));
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast<u32>(result));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case type::SemaphoreInfo::StructureSize::FourWords: {
|
case type::SemaphoreInfo::StructureSize::FourWords: {
|
||||||
// Convert the current nanosecond time to GPU ticks
|
// Write timestamp first to ensure correct ordering
|
||||||
constexpr i64 NsToTickNumerator{384};
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks());
|
||||||
constexpr i64 NsToTickDenominator{625};
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, result);
|
||||||
|
|
||||||
i64 nsTime{util::GetTimeNs()};
|
|
||||||
i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
|
|
||||||
|
|
||||||
channelCtx.asCtx->gmmu.Write<FourWordResult>(registers.semaphore->address,
|
|
||||||
FourWordResult{result, static_cast<u64>(timestamp)});
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,6 +53,26 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn);
|
Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn);
|
||||||
channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn);
|
channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ReleaseSemaphore();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MaxwellDma::ReleaseSemaphore() {
|
||||||
|
if (registers.launchDma->reductionEnable)
|
||||||
|
Logger::Warn("Semaphore reduction is unimplemented!");
|
||||||
|
|
||||||
|
switch (registers.launchDma->semaphoreType) {
|
||||||
|
case Registers::LaunchDma::SemaphoreType::ReleaseOneWordSemaphore:
|
||||||
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload);
|
||||||
|
break;
|
||||||
|
case Registers::LaunchDma::SemaphoreType::ReleaseFourWordSemaphore:
|
||||||
|
// Write timestamp first to ensure correct ordering
|
||||||
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks());
|
||||||
|
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast<u64>(registers.semaphore->payload));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MaxwellDma::CopyPitchToBlockLinear() {
|
void MaxwellDma::CopyPitchToBlockLinear() {
|
||||||
|
@ -27,6 +27,8 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
|
|
||||||
void LaunchDma();
|
void LaunchDma();
|
||||||
|
|
||||||
|
void ReleaseSemaphore();
|
||||||
|
|
||||||
void CopyPitchToBlockLinear();
|
void CopyPitchToBlockLinear();
|
||||||
|
|
||||||
void CopyBlockLinearToPitch();
|
void CopyBlockLinearToPitch();
|
||||||
|
Loading…
Reference in New Issue
Block a user