diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 87643ecb..dfe4de98 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include "presentation_engine.h" @@ -21,12 +22,13 @@ namespace skyline::gpu { using namespace service::hosbinder; PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) - : state(state), - gpu(gpu), - acquireFence(gpu.vkDevice, vk::FenceCreateInfo{}), - presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()), - choreographerThread(&PresentationEngine::ChoreographerThread, this), - vsyncEvent(std::make_shared(state, true)) { + : state{state}, + gpu{gpu}, + acquireFence{gpu.vkDevice, vk::FenceCreateInfo{}}, + presentationTrack{static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()}, + choreographerThread{&PresentationEngine::ChoreographerThread, this}, + presentationThread{&PresentationEngine::PresentationThread, this}, + vsyncEvent{std::make_shared(state, true)} { auto desc{presentationTrack.Serialize()}; desc.set_name("Presentation"); perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc); @@ -88,6 +90,158 @@ namespace skyline::gpu { } } + void PresentationEngine::PresentFrame(const PresentableFrame &frame) { + std::unique_lock lock(mutex); + surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); + + frame.fence.Wait(state.soc->host1x); + + std::scoped_lock textureLock(*frame.texture); + if (frame.texture->format != swapchainFormat || frame.texture->dimensions != swapchainExtent) + UpdateSwapchain(frame.texture->format, frame.texture->dimensions); + + int result; + if (frame.crop && frame.crop != windowCrop) { + if ((result = window->perform(window, NATIVE_WINDOW_SET_CROP, &frame.crop))) + throw exception("Setting the layer crop to ({}-{})x({}-{}) failed with {}", frame.crop.left, frame.crop.right, frame.crop.top, frame.crop.bottom, result); + windowCrop = frame.crop; + } + + if (frame.scalingMode != NativeWindowScalingMode::Freeze && windowScalingMode != frame.scalingMode) { + if ((result = window->perform(window, NATIVE_WINDOW_SET_SCALING_MODE, static_cast(frame.scalingMode)))) + throw exception("Setting the layer scaling mode to '{}' failed with {}", ToString(frame.scalingMode), result); + windowScalingMode = frame.scalingMode; + } + + if (frame.transform != windowTransform) { + if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast(frame.transform)))) + throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result); + windowTransform = frame.transform; + } + + gpu.vkDevice.resetFences(*acquireFence); + + std::pair nextImage; + while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] { + if (nextImage.first == vk::Result::eSuboptimalKHR) + surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); + else + throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); + } + + std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits::max()); + + frame.texture->SynchronizeHost(); + images.at(nextImage.second)->CopyFrom(frame.texture, vk::ImageSubresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }); + + auto getMonotonicNsNow{[]() -> i64 { + timespec time; + if (clock_gettime(CLOCK_MONOTONIC, &time)) + throw exception("Failed to clock_gettime with '{}'", strerror(errno)); + return (time.tv_sec * constant::NsInSecond) + time.tv_nsec; + }}; + + i64 timestamp{frame.timestamp}; + if (timestamp) { + // If the timestamp is specified, we need to convert it from the util::GetTimeNs base to the CLOCK_MONOTONIC one + // We do so by getting an offset from the current time in nanoseconds and then adding it to the current time in CLOCK_MONOTONIC + // Note: It's important we do this right before present as going past the timestamp could lead to fewer Binder IPC calls + i64 current{util::GetTimeNs()}; + if (current < timestamp) { + timestamp = getMonotonicNsNow() + (timestamp - current); + } else { + timestamp = 0; + } + } + + if (frame.swapInterval) { + // If we have a swap interval, we have to adjust the timestamp to emulate the swap interval + i64 lastFramePresentTime{util::AlignUpNpot(windowLastTimestamp, refreshCycleDuration)}; + if (lastFramePresentTime > lastChoreographerTime) + // If the last frame was presented after the last choreographer callback, calculate the new frame's timestamp relative to it + timestamp = std::max(timestamp, lastFramePresentTime + (refreshCycleDuration * frame.swapInterval)); + else + // If there has been a choreographer callback since the last frame, calculate the new frame's timestamp relative to it + timestamp = std::max(timestamp, lastChoreographerTime + (2 * refreshCycleDuration * frame.swapInterval)); + } + + i64 lastTimestamp{std::exchange(windowLastTimestamp, timestamp)}; + if (!timestamp && lastTimestamp) + // We need to nullify the timestamp if it transitioned from being specified (non-zero) to unspecified (zero) + timestamp = NativeWindowTimestampAuto; + + if (timestamp && (result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TIMESTAMP, timestamp))) + throw exception("Setting the buffer timestamp to {} failed with {}", timestamp, result); + + u64 frameId{}; + if ((result = window->perform(window, NATIVE_WINDOW_GET_NEXT_FRAME_ID, &frameId))) + throw exception("Retrieving the next frame's ID failed with {}", result); + + { + std::scoped_lock queueLock{gpu.queueMutex}; + std::ignore = gpu.vkQueue.presentKHR(vk::PresentInfoKHR{ + .swapchainCount = 1, + .pSwapchains = &**vkSwapchain, + .pImageIndices = &nextImage.second, + }); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally + } + + timestamp = timestamp ? timestamp : getMonotonicNsNow(); + if (frameTimestamp) { + i64 sampleWeight{Fps ? Fps : 1}; //!< The weight of each sample in calculating the average, we want to roughly average the past second + + auto weightedAverage{[](auto weight, auto previousAverage, auto current) { + return (((weight - 1) * previousAverage) + current) / weight; + }}; //!< Modified moving average (https://en.wikipedia.org/wiki/Moving_average#Modified_moving_average) + + i64 currentFrametime{timestamp - frameTimestamp}; + averageFrametimeNs = weightedAverage(sampleWeight, averageFrametimeNs, currentFrametime); + AverageFrametimeMs = static_cast(averageFrametimeNs) / constant::NsInMillisecond; + + i64 currentFrametimeDeviation{std::abs(averageFrametimeNs - currentFrametime)}; + averageFrametimeDeviationNs = weightedAverage(sampleWeight, averageFrametimeDeviationNs, currentFrametimeDeviation); + AverageFrametimeDeviationMs = static_cast(averageFrametimeDeviationNs) / constant::NsInMillisecond; + + Fps = static_cast(std::round(static_cast(constant::NsInSecond) / static_cast(averageFrametimeNs))); + + TRACE_EVENT_INSTANT("gpu", "Present", presentationTrack, "FrameTimeNs", timestamp - frameTimestamp, "Fps", Fps); + + frameTimestamp = timestamp; + } else { + frameTimestamp = timestamp; + } + } + + void PresentationEngine::PresentationThread() { + if (int result{pthread_setname_np(pthread_self(), "Sky-Present")}) + Logger::Warn("Failed to set the thread name: {}", strerror(result)); + + try { + signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); + + presentQueue.Process([this](const PresentableFrame& frame) { + PresentFrame(frame); + frame.presentCallback(); // We're calling the callback here as it's outside of all the locks in PresentFrame + }); + } catch (const signal::SignalException &e) { + Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames)); + if (state.process) + state.process->Kill(false); + else + std::rethrow_exception(std::current_exception()); + } catch (const std::exception &e) { + Logger::Error(e.what()); + if (state.process) + state.process->Kill(false); + else + std::rethrow_exception(std::current_exception()); + } + } + NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) { using NativeWindowTransform = NativeWindowTransform; switch (transform) { @@ -219,115 +373,23 @@ namespace skyline::gpu { } } - void PresentationEngine::Present(const std::shared_ptr &texture, i64 timestamp, u64 swapInterval, AndroidRect crop, NativeWindowScalingMode scalingMode, NativeWindowTransform transform, u64 &frameId) { + u64 PresentationEngine::Present(const std::shared_ptr &texture, i64 timestamp, i64 swapInterval, AndroidRect crop, NativeWindowScalingMode scalingMode, NativeWindowTransform transform, skyline::service::hosbinder::AndroidFence fence, const std::function& presentCallback) { std::unique_lock lock(mutex); surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - std::scoped_lock textureLock(*texture); - if (texture->format != swapchainFormat || texture->dimensions != swapchainExtent) - UpdateSwapchain(texture->format, texture->dimensions); - - int result; - if (crop && crop != windowCrop) { - if ((result = window->perform(window, NATIVE_WINDOW_SET_CROP, &crop))) - throw exception("Setting the layer crop to ({}-{})x({}-{}) failed with {}", crop.left, crop.right, crop.top, crop.bottom, result); - windowCrop = crop; - } - - if (scalingMode != NativeWindowScalingMode::Freeze && windowScalingMode != scalingMode) { - if ((result = window->perform(window, NATIVE_WINDOW_SET_SCALING_MODE, static_cast(scalingMode)))) - throw exception("Setting the layer scaling mode to '{}' failed with {}", ToString(scalingMode), result); - windowScalingMode = scalingMode; - } - - if (transform != windowTransform) { - if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast(transform)))) - throw exception("Setting the buffer transform to '{}' failed with {}", ToString(transform), result); - windowTransform = transform; - } - - gpu.vkDevice.resetFences(*acquireFence); - - std::pair nextImage; - while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] { - if (nextImage.first == vk::Result::eSuboptimalKHR) - surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - else - throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); - } - - std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits::max()); - - texture->SynchronizeHost(); - images.at(nextImage.second)->CopyFrom(texture, vk::ImageSubresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .levelCount = 1, - .layerCount = 1, + presentQueue.Push(PresentableFrame{ + texture, + fence, + timestamp, + swapInterval, + presentCallback, + nextFrameId, + crop, + scalingMode, + transform }); - if (timestamp) { - // If the timestamp is specified, we need to convert it from the util::GetTimeNs base to the CLOCK_MONOTONIC one - // We do so by getting an offset from the current time in nanoseconds and then adding it to the current time in CLOCK_MONOTONIC - // Note: It's important we do this right before present as going past the timestamp could lead to fewer Binder IPC calls - i64 current{util::GetTimeNs()}; - if (current < timestamp) { - timespec time; - if (clock_gettime(CLOCK_MONOTONIC, &time)) - throw exception("Failed to clock_gettime with '{}'", strerror(errno)); - timestamp = ((time.tv_sec * constant::NsInSecond) + time.tv_nsec) + (timestamp - current); - } else { - timestamp = 0; - } - } - - if (swapInterval > 1) - // If we have a swap interval above 1 we have to adjust the timestamp to emulate the swap interval - timestamp = std::max(timestamp, lastChoreographerTime + (refreshCycleDuration * static_cast(swapInterval) * 2)); - - auto lastTimestamp{std::exchange(windowLastTimestamp, timestamp)}; - if (!timestamp && lastTimestamp) - // We need to nullify the timestamp if it transitioned from being specified (non-zero) to unspecified (zero) - timestamp = NativeWindowTimestampAuto; - - if (timestamp && (result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TIMESTAMP, timestamp))) - throw exception("Setting the buffer timestamp to {} failed with {}", timestamp, result); - - if ((result = window->perform(window, NATIVE_WINDOW_GET_NEXT_FRAME_ID, &frameId))) - throw exception("Retrieving the next frame's ID failed with {}", result); - - { - std::scoped_lock queueLock{gpu.queueMutex}; - std::ignore = gpu.vkQueue.presentKHR(vk::PresentInfoKHR{ - .swapchainCount = 1, - .pSwapchains = &**vkSwapchain, - .pImageIndices = &nextImage.second, - }); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally - } - - if (frameTimestamp) { - i64 now{util::GetTimeNs()}; - i64 sampleWeight{swapInterval ? constant::NsInSecond / (refreshCycleDuration * static_cast(swapInterval)) : 10}; //!< The weight of each sample in calculating the average, we arbitrarily average 10 samples for unlocked FPS - - auto weightedAverage{[](auto weight, auto previousAverage, auto current) { - return (((weight - 1) * previousAverage) + current) / weight; - }}; //!< Modified moving average (https://en.wikipedia.org/wiki/Moving_average#Modified_moving_average) - - i64 currentFrametime{now - frameTimestamp}; - averageFrametimeNs = weightedAverage(sampleWeight, averageFrametimeNs, currentFrametime); - AverageFrametimeMs = static_cast(averageFrametimeNs) / constant::NsInMillisecond; - - i64 currentFrametimeDeviation{std::abs(averageFrametimeNs - currentFrametime)}; - averageFrametimeDeviationNs = weightedAverage(sampleWeight, averageFrametimeDeviationNs, currentFrametimeDeviation); - AverageFrametimeDeviationMs = static_cast(averageFrametimeDeviationNs) / constant::NsInMillisecond; - - Fps = static_cast(std::round(static_cast(constant::NsInSecond) / static_cast(averageFrametimeNs))); - - TRACE_EVENT_INSTANT("gpu", "Present", presentationTrack, "FrameTimeNs", now - frameTimestamp, "Fps", Fps); - - frameTimestamp = now; - } else { - frameTimestamp = util::GetTimeNs(); - } + return nextFrameId++; } NativeWindowTransform PresentationEngine::GetTransformHint() { diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.h b/app/src/main/cpp/skyline/gpu/presentation_engine.h index 7433c7d0..7886d789 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.h +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "texture/texture.h" @@ -28,7 +29,7 @@ namespace skyline::gpu { service::hosbinder::AndroidRect windowCrop{}; //!< A rectangle with the bounds of the current crop performed on the image prior to presentation service::hosbinder::NativeWindowScalingMode windowScalingMode{service::hosbinder::NativeWindowScalingMode::ScaleToWindow}; //!< The mode in which the cropped image is scaled up to the surface service::hosbinder::NativeWindowTransform windowTransform{}; //!< The transformation performed on the image prior to presentation - u64 windowLastTimestamp{}; //!< The last timestamp submitted to the window, 0 or CLOCK_MONOTONIC value + i64 windowLastTimestamp{}; //!< The last timestamp submitted to the window, 0 or CLOCK_MONOTONIC value std::optional vkSurface; //!< The Vulkan Surface object that is backed by ANativeWindow vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities; //!< The capabilities of the current Vulkan Surface @@ -52,6 +53,24 @@ namespace skyline::gpu { i64 refreshCycleDuration{}; //!< The duration of a single refresh cycle for the display in nanoseconds bool choreographerStop{}; //!< If the Choreographer thread should stop on the next ALooper_wake() + struct PresentableFrame { + std::shared_ptr texture{}; + skyline::service::hosbinder::AndroidFence fence{}; //!< The fence that must be waited on prior to using the texture + i64 timestamp{}; //!< The earliest timestamp (relative to ARM CPU timer) that this frame must be presented at + i64 swapInterval{}; //!< The interval between frames in terms of 60Hz display refreshes (1/60th of a second) + std::function presentCallback; //!< A user-defined callback to use after presenting a frame + size_t id{}; //!< The ID of this frame, it is used to correlate the frame in other operations + + service::hosbinder::AndroidRect crop{}; + service::hosbinder::NativeWindowScalingMode scalingMode{}; + service::hosbinder::NativeWindowTransform transform{}; + }; + + std::thread presentationThread; //!< A thread for asynchronously presenting queued frames after their corresponded fences are signalled + static constexpr size_t PresentQueueFrameCount{5}; //!< The amount of frames the presentation queue can hold + CircularQueue presentQueue{PresentQueueFrameCount}; //!< A circular queue containing all the frames that we can present + size_t nextFrameId{1}; //!< The frame ID to use for the next frame + /** * @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_postframecallback64 */ @@ -62,6 +81,16 @@ namespace skyline::gpu { */ void ChoreographerThread(); + /** + * @brief Submits a single frame to the host API for presentation with the appropriate waits and copies + */ + void PresentFrame(const PresentableFrame& frame); + + /** + * @brief The thread that handles presentation of frames submitted to it + */ + void PresentationThread(); + /** * @note 'PresentationEngine::mutex' **must** be locked prior to calling this */ @@ -86,10 +115,12 @@ namespace skyline::gpu { * @param crop A rectangle with bounds that the image will be cropped to * @param scalingMode The mode by which the image must be scaled up to the surface * @param transform A transformation that should be performed on the image - * @param frameId The ID of this frame for correlating it with presentation timing readouts + * @param fence The fence to wait on prior to presenting the texture + * @param presentCallback The callback to be called when the texture is presented to the surface + * @return The ID of this frame for correlating it with presentation timing readouts * @note The texture **must** be locked prior to calling this */ - void Present(const std::shared_ptr &texture, i64 timestamp, u64 swapInterval, service::hosbinder::AndroidRect crop, service::hosbinder::NativeWindowScalingMode scalingMode, service::hosbinder::NativeWindowTransform transform, u64 &frameId); + u64 Present(const std::shared_ptr &texture, i64 timestamp, i64 swapInterval, service::hosbinder::AndroidRect crop, service::hosbinder::NativeWindowScalingMode scalingMode, service::hosbinder::NativeWindowTransform transform, skyline::service::hosbinder::AndroidFence fence, const std::function& presentCallback); /** * @return A transform that the application should render with to elide costly transforms later diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp index 5fd07707..477c2486 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp @@ -384,16 +384,15 @@ namespace skyline::service::hosbinder { throw exception("Application attempting to perform unknown sticky transformation: {:#b}", static_cast(stickyTransform)); } - fence.Wait(state.soc->host1x); + state.gpu->presentation.Present(buffer.texture, isAutoTimestamp ? 0 : timestamp, swapInterval, crop, scalingMode, transform, fence, [this, &buffer] { + std::scoped_lock lock(mutex); - { - u64 frameId; - state.gpu->presentation.Present(buffer.texture, isAutoTimestamp ? 0 : timestamp, swapInterval, crop, scalingMode, transform, frameId); - } + buffer.state = BufferState::Free; + bufferEvent->Signal(); + }); + buffer.state = BufferState::Queued; buffer.frameNumber = ++frameNumber; - buffer.state = BufferState::Free; - bufferEvent->Signal(); width = defaultWidth; height = defaultHeight; diff --git a/app/src/main/cpp/skyline/services/hosbinder/android_types.h b/app/src/main/cpp/skyline/services/hosbinder/android_types.h index 3335d304..5db5262f 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/android_types.h +++ b/app/src/main/cpp/skyline/services/hosbinder/android_types.h @@ -82,7 +82,7 @@ namespace skyline::service::hosbinder { /** * @return If the rectangle had any defined bounds */ - constexpr operator bool() { + constexpr operator bool() const { return left || top || right || bottom; }