mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-16 11:07:54 +03:00
Revamp Host1X Syncpoint + Address Review Comments
This commit is contained in:
parent
b9af701bbe
commit
16f875bab6
@ -34,7 +34,7 @@ namespace skyline::gpu {
|
|||||||
/**
|
/**
|
||||||
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback
|
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback
|
||||||
*/
|
*/
|
||||||
void ChoreographerCallback(long frameTimeNanos, kernel::type::KEvent* vsyncEvent) {
|
void ChoreographerCallback(long frameTimeNanos, kernel::type::KEvent *vsyncEvent) {
|
||||||
vsyncEvent->Signal();
|
vsyncEvent->Signal();
|
||||||
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), vsyncEvent);
|
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), vsyncEvent);
|
||||||
}
|
}
|
||||||
@ -42,7 +42,7 @@ namespace skyline::gpu {
|
|||||||
void PresentationEngine::ChoreographerThread() {
|
void PresentationEngine::ChoreographerThread() {
|
||||||
choreographerLooper = ALooper_prepare(0);
|
choreographerLooper = ALooper_prepare(0);
|
||||||
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), vsyncEvent.get());
|
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), vsyncEvent.get());
|
||||||
ALooper_pollAll(-1, nullptr, nullptr, nullptr);
|
ALooper_pollAll(-1, nullptr, nullptr, nullptr); // Will block and process callbacks till ALooper_wake() is called
|
||||||
}
|
}
|
||||||
|
|
||||||
service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) {
|
service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) {
|
||||||
@ -70,8 +70,8 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
void PresentationEngine::UpdateSwapchain(texture::Format format, texture::Dimensions extent) {
|
void PresentationEngine::UpdateSwapchain(texture::Format format, texture::Dimensions extent) {
|
||||||
auto minImageCount{std::max(vkSurfaceCapabilities.minImageCount, state.settings->forceTripleBuffering ? 3U : 0U)};
|
auto minImageCount{std::max(vkSurfaceCapabilities.minImageCount, state.settings->forceTripleBuffering ? 3U : 0U)};
|
||||||
if (minImageCount > MaxSlotCount)
|
if (minImageCount > MaxSwapchainImageCount)
|
||||||
throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount);
|
throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", minImageCount, MaxSwapchainImageCount);
|
||||||
|
|
||||||
const auto &capabilities{vkSurfaceCapabilities};
|
const auto &capabilities{vkSurfaceCapabilities};
|
||||||
if (minImageCount < capabilities.minImageCount || (capabilities.maxImageCount && minImageCount > capabilities.maxImageCount))
|
if (minImageCount < capabilities.minImageCount || (capabilities.maxImageCount && minImageCount > capabilities.maxImageCount))
|
||||||
@ -104,16 +104,17 @@ namespace skyline::gpu {
|
|||||||
});
|
});
|
||||||
|
|
||||||
auto vkImages{vkSwapchain->getImages()};
|
auto vkImages{vkSwapchain->getImages()};
|
||||||
if (vkImages.size() > MaxSlotCount)
|
if (vkImages.size() > MaxSwapchainImageCount)
|
||||||
throw exception("Swapchain has higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount);
|
throw exception("Swapchain has higher image count ({}) than maximum slot count ({})", minImageCount, MaxSwapchainImageCount);
|
||||||
|
|
||||||
for (size_t index{}; index < vkImages.size(); index++) {
|
for (size_t index{}; index < vkImages.size(); index++) {
|
||||||
auto &slot{slots[index]};
|
auto &slot{images[index]};
|
||||||
slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format::GetFormat(format), vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal);
|
slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format::GetFormat(format), vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal);
|
||||||
slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
|
slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
|
||||||
}
|
}
|
||||||
for (size_t index{vkImages.size()}; index < MaxSlotCount; index++)
|
for (size_t index{vkImages.size()}; index < MaxSwapchainImageCount; index++)
|
||||||
slots[index] = {};
|
// We need to clear all the slots which aren't filled, keeping around stale slots could lead to issues
|
||||||
|
images[index] = {};
|
||||||
|
|
||||||
swapchainFormat = format;
|
swapchainFormat = format;
|
||||||
swapchainExtent = extent;
|
swapchainExtent = extent;
|
||||||
@ -157,14 +158,15 @@ namespace skyline::gpu {
|
|||||||
UpdateSwapchain(texture->format, texture->dimensions);
|
UpdateSwapchain(texture->format, texture->dimensions);
|
||||||
|
|
||||||
std::pair<vk::Result, u32> nextImage;
|
std::pair<vk::Result, u32> nextImage;
|
||||||
while ((nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence)).first != vk::Result::eSuccess) [[unlikely]]
|
while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
|
||||||
if (nextImage.first == vk::Result::eSuboptimalKHR)
|
if (nextImage.first == vk::Result::eSuboptimalKHR)
|
||||||
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
|
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
|
||||||
else
|
else
|
||||||
throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
|
throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
|
||||||
while (gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max()) == vk::Result::eTimeout);
|
}
|
||||||
|
|
||||||
slots.at(nextImage.second)->CopyFrom(texture);
|
static_cast<void>(gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max()));
|
||||||
|
images.at(nextImage.second)->CopyFrom(texture);
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard queueLock(gpu.queueMutex);
|
std::lock_guard queueLock(gpu.queueMutex);
|
||||||
|
@ -32,14 +32,14 @@ namespace skyline::gpu {
|
|||||||
texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain
|
texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain
|
||||||
texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain
|
texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain
|
||||||
|
|
||||||
static constexpr size_t MaxSlotCount{6}; //!< The maximum amount of queue slots, this affects the amount of images that can be in the swapchain
|
static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain
|
||||||
std::array<std::shared_ptr<Texture>, MaxSlotCount> slots; //!< The backing for storing all slots and sorted in the same order as supplied by the Vulkan swapchain
|
std::array<std::shared_ptr<Texture>, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain
|
||||||
|
|
||||||
u64 frameTimestamp{}; //!< The timestamp of the last frame being shown
|
u64 frameTimestamp{}; //!< The timestamp of the last frame being shown
|
||||||
perfetto::Track presentationTrack; //!< Perfetto track used for presentation events
|
perfetto::Track presentationTrack; //!< Perfetto track used for presentation events
|
||||||
|
|
||||||
std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer
|
std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer
|
||||||
ALooper* choreographerLooper{}; //!< The looper object associated with the Choreographer thread
|
ALooper *choreographerLooper{}; //!< The looper object associated with the Choreographer thread
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread
|
* @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread
|
||||||
|
@ -12,7 +12,7 @@ namespace skyline::gpu::format {
|
|||||||
constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vk::Format::eR5G6B5UnormPack16}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit
|
constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vk::Format::eR5G6B5UnormPack16}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Converts a format from Vulkan to a Skyline format
|
* @brief Converts a Vulkan format to a Skyline format
|
||||||
*/
|
*/
|
||||||
constexpr const Format &GetFormat(vk::Format format) {
|
constexpr const Format &GetFormat(vk::Format format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
|
@ -140,12 +140,14 @@ namespace skyline::gpu {
|
|||||||
u8 *bufferData;
|
u8 *bufferData;
|
||||||
auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> {
|
auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> {
|
||||||
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
|
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
|
||||||
|
// We need a staging buffer for all optimal copies (Since we aren't aware of the host optimal layout) and linear textures which we cannot map on the CPU since we do not have access to their backing VkDeviceMemory
|
||||||
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
|
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
|
||||||
bufferData = stagingBuffer->data();
|
bufferData = stagingBuffer->data();
|
||||||
return stagingBuffer;
|
return stagingBuffer;
|
||||||
} else if (tiling == vk::ImageTiling::eLinear) {
|
} else if (tiling == vk::ImageTiling::eLinear) {
|
||||||
|
// We can optimize linear texture sync on a UMA by mapping the texture onto the CPU and copying directly into it rather than a staging buffer
|
||||||
bufferData = std::get<memory::Image>(backing).data();
|
bufferData = std::get<memory::Image>(backing).data();
|
||||||
WaitOnFence();
|
WaitOnFence(); // We need to wait on fence here since we are mutating the texture directly after, the wait can be deferred till the copy when a staging buffer is used
|
||||||
return nullptr;
|
return nullptr;
|
||||||
} else {
|
} else {
|
||||||
throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling));
|
throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling));
|
||||||
|
@ -188,7 +188,7 @@ namespace skyline::gpu {
|
|||||||
texture::TileMode tileMode;
|
texture::TileMode tileMode;
|
||||||
texture::TileConfig tileConfig;
|
texture::TileConfig tileConfig;
|
||||||
|
|
||||||
GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format& format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {});
|
GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format &format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {});
|
||||||
|
|
||||||
constexpr size_t Size() {
|
constexpr size_t Size() {
|
||||||
return format.GetSize(dimensions);
|
return format.GetSize(dimensions);
|
||||||
@ -206,18 +206,18 @@ namespace skyline::gpu {
|
|||||||
* @note There can only be one host texture for a corresponding guest texture
|
* @note There can only be one host texture for a corresponding guest texture
|
||||||
* @note If any of the supplied parameters do not match up with the backing then it's undefined behavior
|
* @note If any of the supplied parameters do not match up with the backing then it's undefined behavior
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<Texture> InitializeTexture(vk::Image backing, texture::Dimensions dimensions = {}, const texture::Format& format = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, texture::Swizzle swizzle = {});
|
std::shared_ptr<Texture> InitializeTexture(vk::Image backing, texture::Dimensions dimensions = {}, const texture::Format &format = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, texture::Swizzle swizzle = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @note As a RAII object is used here, the lifetime of the backing is handled by the host texture
|
* @note As a RAII object is used here, the lifetime of the backing is handled by the host texture
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {});
|
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, const texture::Format &format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Similar to InitializeTexture but creation of the backing and allocation of memory for the backing is automatically performed by the function
|
* @brief Similar to InitializeTexture but creation of the backing and allocation of memory for the backing is automatically performed by the function
|
||||||
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
|
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<Texture> CreateTexture(vk::ImageUsageFlags usage = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {});
|
std::shared_ptr<Texture> CreateTexture(vk::ImageUsageFlags usage = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, const texture::Format &format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {});
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -252,15 +252,15 @@ namespace skyline::gpu {
|
|||||||
vk::ImageTiling tiling;
|
vk::ImageTiling tiling;
|
||||||
vk::ComponentMapping mapping;
|
vk::ComponentMapping mapping;
|
||||||
|
|
||||||
Texture(GPU &gpu, BackingType &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping);
|
Texture(GPU &gpu, BackingType &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping);
|
||||||
|
|
||||||
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping = {});
|
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates and allocates memory for the backing to creates a texture object wrapping it
|
* @brief Creates and allocates memory for the backing to creates a texture object wrapping it
|
||||||
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
|
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
|
||||||
*/
|
*/
|
||||||
Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, vk::ComponentMapping mapping = {});
|
Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, vk::ComponentMapping mapping = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Acquires an exclusive lock on the texture for the calling thread
|
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||||
|
@ -50,7 +50,6 @@ namespace skyline::service::hosbinder {
|
|||||||
|
|
||||||
std::lock_guard guard(mutex);
|
std::lock_guard guard(mutex);
|
||||||
auto buffer{queue.end()};
|
auto buffer{queue.end()};
|
||||||
while (true) {
|
|
||||||
size_t dequeuedSlotCount{};
|
size_t dequeuedSlotCount{};
|
||||||
for (auto it{queue.begin()}; it != queue.end(); it++) {
|
for (auto it{queue.begin()}; it != queue.end(); it++) {
|
||||||
// We want to select the oldest slot that's free to use as we'd want all slots to be used
|
// We want to select the oldest slot that's free to use as we'd want all slots to be used
|
||||||
@ -58,20 +57,25 @@ namespace skyline::service::hosbinder {
|
|||||||
if (it->state == BufferState::Free && it->texture) {
|
if (it->state == BufferState::Free && it->texture) {
|
||||||
if (buffer == queue.end() || it->frameNumber < buffer->frameNumber)
|
if (buffer == queue.end() || it->frameNumber < buffer->frameNumber)
|
||||||
buffer = it;
|
buffer = it;
|
||||||
else if (it->state == BufferState::Dequeued)
|
} else if (it->state == BufferState::Dequeued) {
|
||||||
dequeuedSlotCount++;
|
dequeuedSlotCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffer != queue.end()) {
|
if (buffer != queue.end()) {
|
||||||
slot = std::distance(queue.begin(), buffer);
|
slot = std::distance(queue.begin(), buffer);
|
||||||
break;
|
|
||||||
} else if (async) {
|
} else if (async) {
|
||||||
return AndroidStatus::WouldBlock;
|
return AndroidStatus::WouldBlock;
|
||||||
} else if (dequeuedSlotCount == queue.size()) {
|
} else if (dequeuedSlotCount == queue.size()) {
|
||||||
state.logger->Warn("Client attempting to dequeue more buffers when all buffers are dequeued by the client: {}", dequeuedSlotCount);
|
state.logger->Warn("Client attempting to dequeue more buffers when all buffers are dequeued by the client: {}", dequeuedSlotCount);
|
||||||
return AndroidStatus::InvalidOperation;
|
return AndroidStatus::InvalidOperation;
|
||||||
}
|
} else {
|
||||||
|
size_t index{};
|
||||||
|
std::string bufferString;
|
||||||
|
for (auto& bufferSlot : queue)
|
||||||
|
bufferString += util::Format("\n#{} - State: {}, Has Graphic Buffer: {}, Frame Number: {}", ++index, ToString(bufferSlot.state), static_cast<bool>(bufferSlot.graphicBuffer), bufferSlot.frameNumber);
|
||||||
|
state.logger->Warn("Cannot find any free buffers to dequeue:{}", bufferString);
|
||||||
|
return AndroidStatus::InvalidOperation;
|
||||||
}
|
}
|
||||||
|
|
||||||
width = width ? width : defaultWidth;
|
width = width ? width : defaultWidth;
|
||||||
@ -392,7 +396,7 @@ namespace skyline::service::hosbinder {
|
|||||||
buffer.frameNumber = 0;
|
buffer.frameNumber = 0;
|
||||||
buffer.wasBufferRequested = false;
|
buffer.wasBufferRequested = false;
|
||||||
buffer.graphicBuffer = std::make_unique<GraphicBuffer>(graphicBuffer);
|
buffer.graphicBuffer = std::make_unique<GraphicBuffer>(graphicBuffer);
|
||||||
buffer.texture = texture->CreateTexture({}, vk::ImageTiling::eLinear, vk::ImageLayout::eGeneral);
|
buffer.texture = texture->CreateTexture({}, vk::ImageTiling::eLinear);
|
||||||
|
|
||||||
activeSlotCount = hasBufferCount = std::count_if(queue.begin(), queue.end(), [](const BufferSlot &slot) { return static_cast<bool>(slot.graphicBuffer); });
|
activeSlotCount = hasBufferCount = std::count_if(queue.begin(), queue.end(), [](const BufferSlot &slot) { return static_cast<bool>(slot.graphicBuffer); });
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ namespace skyline::service::hosbinder {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp;l=34-36
|
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp;l=34-36
|
||||||
|
* @note Only initializing the first fence is intentional and matches Nvidia's AndroidFence implementation
|
||||||
*/
|
*/
|
||||||
AndroidFence() : fenceCount(0), fences({InvalidFenceId}) {}
|
AndroidFence() : fenceCount(0), fences({InvalidFenceId}) {}
|
||||||
|
|
||||||
|
@ -49,7 +49,8 @@ namespace skyline::service::nvdrv::device {
|
|||||||
void SyncpointEvent::Cancel(soc::host1x::Host1X &host1x) {
|
void SyncpointEvent::Cancel(soc::host1x::Host1X &host1x) {
|
||||||
std::lock_guard lock(mutex);
|
std::lock_guard lock(mutex);
|
||||||
|
|
||||||
host1x.syncpoints.at(fence.id).DeregisterWaiter(waiterId);
|
host1x.syncpoints.at(fence.id).DeregisterWaiter(waiterHandle);
|
||||||
|
waiterHandle = {};
|
||||||
Signal();
|
Signal();
|
||||||
event->ResetSignal();
|
event->ResetSignal();
|
||||||
}
|
}
|
||||||
@ -59,7 +60,7 @@ namespace skyline::service::nvdrv::device {
|
|||||||
|
|
||||||
fence = pFence;
|
fence = pFence;
|
||||||
state = State::Waiting;
|
state = State::Waiting;
|
||||||
waiterId = host1x.syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
|
waiterHandle = host1x.syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state) {}
|
NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state) {}
|
||||||
|
@ -17,7 +17,7 @@ namespace skyline {
|
|||||||
*/
|
*/
|
||||||
class SyncpointEvent {
|
class SyncpointEvent {
|
||||||
private:
|
private:
|
||||||
u64 waiterId{};
|
soc::host1x::Syncpoint::WaiterHandle waiterHandle{};
|
||||||
|
|
||||||
void Signal();
|
void Signal();
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ namespace skyline::service::nvdrv {
|
|||||||
if (!syncpoints.at(id).reserved)
|
if (!syncpoints.at(id).reserved)
|
||||||
throw exception("Cannot update an unreserved syncpoint!");
|
throw exception("Cannot update an unreserved syncpoint!");
|
||||||
|
|
||||||
syncpoints.at(id).counterMin = state.soc->host1x.syncpoints.at(id).value.load();
|
syncpoints.at(id).counterMin = state.soc->host1x.syncpoints.at(id).Load();
|
||||||
return syncpoints.at(id).counterMin;
|
return syncpoints.at(id).counterMin;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,62 +5,60 @@
|
|||||||
#include "syncpoint.h"
|
#include "syncpoint.h"
|
||||||
|
|
||||||
namespace skyline::soc::host1x {
|
namespace skyline::soc::host1x {
|
||||||
u64 Syncpoint::RegisterWaiter(u32 threshold, const std::function<void()> &callback) {
|
Syncpoint::WaiterHandle Syncpoint::RegisterWaiter(u32 threshold, const std::function<void()> &callback) {
|
||||||
if (value >= threshold) {
|
if (value.load(std::memory_order_acquire) >= threshold) {
|
||||||
|
// (Fast path) We don't need to wait on the mutex and can just get away with atomics
|
||||||
callback();
|
callback();
|
||||||
return 0;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard guard(waiterLock);
|
std::scoped_lock lock(mutex);
|
||||||
waiterMap.emplace(nextWaiterId, Waiter{threshold, callback});
|
if (value.load(std::memory_order_acquire) >= threshold) {
|
||||||
|
callback();
|
||||||
return nextWaiterId++;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void Syncpoint::DeregisterWaiter(u64 id) {
|
auto it{waiters.begin()};
|
||||||
std::lock_guard guard(waiterLock);
|
while (it != waiters.end() && threshold >= it->threshold)
|
||||||
waiterMap.erase(id);
|
it++;
|
||||||
|
return waiters.emplace(it, threshold, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Syncpoint::DeregisterWaiter(WaiterHandle waiter) {
|
||||||
|
std::scoped_lock lock(mutex);
|
||||||
|
// We want to ensure the iterator still exists prior to erasing it
|
||||||
|
// Otherwise, if an invalid iterator was passed in then it could lead to UB
|
||||||
|
// It is important to avoid UB in that case since the deregister isn't called from a locked context
|
||||||
|
for (auto it{waiters.begin()}; it != waiters.end(); it++)
|
||||||
|
if (it == waiter)
|
||||||
|
waiters.erase(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 Syncpoint::Increment() {
|
u32 Syncpoint::Increment() {
|
||||||
value++;
|
auto readValue{value.fetch_add(1, std::memory_order_acq_rel)}; // We don't want to constantly do redundant atomic loads
|
||||||
|
|
||||||
std::lock_guard guard(waiterLock);
|
std::lock_guard lock(mutex);
|
||||||
std::erase_if(waiterMap, [this](const auto &entry) {
|
auto it{waiters.begin()};
|
||||||
if (value >= entry.second.threshold) {
|
while (it != waiters.end() && readValue >= it->threshold)
|
||||||
entry.second.callback();
|
it++->callback();
|
||||||
return true;
|
waiters.erase(waiters.begin(), it);
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return value;
|
incrementCondition.notify_all();
|
||||||
|
|
||||||
|
return readValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Syncpoint::Wait(u32 threshold, std::chrono::steady_clock::duration timeout) {
|
bool Syncpoint::Wait(u32 threshold, std::chrono::steady_clock::duration timeout) {
|
||||||
if (value >= threshold)
|
if (value.load(std::memory_order_acquire) >= threshold)
|
||||||
return true;
|
// (Fast Path) We don't need to wait on the mutex and can just get away with atomics
|
||||||
|
return {};
|
||||||
|
|
||||||
std::mutex mtx;
|
std::unique_lock lock(mutex);
|
||||||
std::condition_variable cv;
|
|
||||||
bool flag{};
|
|
||||||
|
|
||||||
if (!RegisterWaiter(threshold, [&cv, &mtx, &flag] {
|
|
||||||
std::unique_lock lock(mtx);
|
|
||||||
flag = true;
|
|
||||||
lock.unlock();
|
|
||||||
cv.notify_all();
|
|
||||||
})) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_lock lock(mtx);
|
|
||||||
if (timeout == std::chrono::steady_clock::duration::max()) {
|
if (timeout == std::chrono::steady_clock::duration::max()) {
|
||||||
cv.wait(lock, [&flag] { return flag; });
|
incrementCondition.wait(lock, [&] { return value.load(std::memory_order_relaxed) >= threshold; });
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return cv.wait_for(lock, timeout, [&flag] { return flag; });
|
return incrementCondition.wait_for(lock, timeout, [&] { return value.load(std::memory_order_relaxed) >= threshold; });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,33 +14,43 @@ namespace skyline::soc::host1x {
|
|||||||
*/
|
*/
|
||||||
class Syncpoint {
|
class Syncpoint {
|
||||||
private:
|
private:
|
||||||
|
std::atomic<u32> value{}; //!< An atomically-incrementing counter at the core of a syncpoint
|
||||||
|
|
||||||
|
std::mutex mutex; //!< Synchronizes insertions and deletions of waiters alongside locking the increment condition
|
||||||
|
std::condition_variable incrementCondition; //!< Signalled on every increment to the syncpoint
|
||||||
|
|
||||||
struct Waiter {
|
struct Waiter {
|
||||||
u32 threshold; //!< The syncpoint value to wait on to be reached
|
u32 threshold; //!< The syncpoint value to wait on to be reached
|
||||||
std::function<void()> callback; //!< The callback to do after the wait has ended
|
std::function<void()> callback; //!< The callback to do after the wait has ended
|
||||||
};
|
|
||||||
|
|
||||||
std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
|
Waiter(u32 threshold, std::function<void()> callback) : threshold(threshold), callback(std::move(callback)) {}
|
||||||
std::map<u64, Waiter> waiterMap;
|
};
|
||||||
u64 nextWaiterId{1};
|
std::list<Waiter> waiters; //!< A linked list of all waiters, it's sorted in ascending order by threshold
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::atomic<u32> value{};
|
/**
|
||||||
|
* @return The value of the syncpoint, retrieved in an atomically safe manner
|
||||||
|
*/
|
||||||
|
constexpr u32 Load() {
|
||||||
|
return value.load(std::memory_order_acquire);
|
||||||
|
}
|
||||||
|
|
||||||
|
using WaiterHandle = decltype(waiters)::iterator; //!< Aliasing an iterator to a Waiter as an opaque handle
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
|
* @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
|
||||||
* @note The callback will be called immediately if the syncpoint has already reached the given threshold
|
* @note The callback will be called immediately if the syncpoint has already reached the given threshold
|
||||||
* @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
|
* @return A handle that can be used to deregister the waiter, its boolean operator will evaluate to false if the threshold has already been reached
|
||||||
*/
|
*/
|
||||||
u64 RegisterWaiter(u32 threshold, const std::function<void()> &callback);
|
WaiterHandle RegisterWaiter(u32 threshold, const std::function<void()> &callback);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Removes a waiter given by 'id' from the pending waiter map
|
* @note If the supplied handle is invalid then the function will do nothing
|
||||||
*/
|
*/
|
||||||
void DeregisterWaiter(u64 id);
|
void DeregisterWaiter(WaiterHandle waiter);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Increments the syncpoint by 1
|
* @return The new value of the syncpoint after the increment
|
||||||
* @return The new value of the syncpoint
|
|
||||||
*/
|
*/
|
||||||
u32 Increment();
|
u32 Increment();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user