diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 8d29c289..d222d134 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -24,7 +24,8 @@ namespace skyline::gpu { PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state{state}, gpu{gpu}, - acquireFence{gpu.vkDevice, vk::FenceCreateInfo{}}, + presentSemaphores{util::MakeFilledArray(gpu.vkDevice, vk::SemaphoreCreateInfo{})}, + acquireSemaphores{util::MakeFilledArray(gpu.vkDevice, vk::SemaphoreCreateInfo{})}, presentationTrack{static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()}, vsyncEvent{std::make_shared(state, true)}, choreographerThread{&PresentationEngine::ChoreographerThread, this}, @@ -116,35 +117,31 @@ namespace skyline::gpu { windowScalingMode = frame.scalingMode; } - if (frame.transform != windowTransform) { - if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast(frame.transform)))) - throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result); - windowTransform = frame.transform; - } + if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast(frame.transform)))) + throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result); + windowTransform = frame.transform; - gpu.vkDevice.resetFences(*acquireFence); + auto &acquireSemaphore{acquireSemaphores[acquireSemaphoreIndex]}; + acquireSemaphoreIndex = (acquireSemaphoreIndex + 1) % swapchainImageCount; std::pair nextImage; - while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] { + while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits::max(), *acquireSemaphore, {}), nextImage.first != vk::Result::eSuccess) [[unlikely]] { if (nextImage.first == vk::Result::eSuboptimalKHR) surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); else throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); } - auto &nextImageTexture{images.at(nextImage.second)}; - std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits::max()); + auto &nextImageTexture{images.at(nextImage.second)}; + auto &presentSemaphore{presentSemaphores[nextImage.second]}; texture->SynchronizeHost(); - nextImageTexture->CopyFrom(texture, vk::ImageSubresourceRange{ + nextImageTexture->CopyFrom(texture, *acquireSemaphore, *presentSemaphore, vk::ImageSubresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, .levelCount = 1, .layerCount = 1, }); - // Wait on the copy to the swapchain image to complete before submitting for presentation - nextImageTexture->WaitOnFence(); - auto getMonotonicNsNow{[]() -> i64 { timespec time; if (clock_gettime(CLOCK_MONOTONIC, &time)) @@ -194,6 +191,8 @@ namespace skyline::gpu { .swapchainCount = 1, .pSwapchains = &**vkSwapchain, .pImageIndices = &nextImage.second, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &*presentSemaphore, }); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally } @@ -328,6 +327,7 @@ namespace skyline::gpu { swapchainFormat = format; swapchainExtent = extent; + swapchainImageCount = vkImages.size(); } void PresentationEngine::UpdateSurface(jobject newSurface) { diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.h b/app/src/main/cpp/skyline/gpu/presentation_engine.h index 2fbcee04..21581b4b 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.h +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.h @@ -35,12 +35,15 @@ namespace skyline::gpu { vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities{}; //!< The capabilities of the current Vulkan Surface std::optional vkSwapchain; //!< The Vulkan swapchain and the properties associated with it - vk::raii::Fence acquireFence; //!< A fence for acquiring an image from the swapchain texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain std::array, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain + std::array presentSemaphores; //!< Array of semaphores used to signal that swapchain images are ready to be completed, indexed by Vulkan swapchain index + std::array acquireSemaphores; //!< Array of semaphores used to wait on the GPU for swapchain images to be acquired, indexed by `acquireSemaphoreIndex` + size_t acquireSemaphoreIndex{}; //!< The index of the next semaphore to be used for acquiring swapchain images + size_t swapchainImageCount{}; //!< The number of images in the current swapchain i64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds i64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index ec9db2a6..d7b207ff 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -839,14 +839,16 @@ namespace skyline::gpu { return std::make_shared(shared_from_this(), type, range, pFormat, mapping); } - void Texture::CopyFrom(std::shared_ptr source, const vk::ImageSubresourceRange &subresource) { - WaitOnBacking(); - source->WaitOnBacking(); + void Texture::CopyFrom(std::shared_ptr source, vk::Semaphore waitSemaphore, vk::Semaphore signalSemaphore, const vk::ImageSubresourceRange &subresource) { if (cycle) cycle->WaitSubmit(); if (source->cycle) source->cycle->WaitSubmit(); + WaitOnBacking(); + source->WaitOnBacking(); + WaitOnFence(); + if (source->layout == vk::ImageLayout::eUndefined) throw exception("Cannot copy from image with undefined layout"); else if (source->dimensions != dimensions) @@ -854,78 +856,92 @@ namespace skyline::gpu { TRACE_EVENT("gpu", "Texture::CopyFrom"); - auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { - auto sourceBacking{source->GetBacking()}; - if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { - commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = sourceBacking, - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .oldLayout = source->layout, - .newLayout = vk::ImageLayout::eTransferSrcOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .subresourceRange = subresource, - }); - } + auto submitFunc{[&](vk::Semaphore extraWaitSemaphore){ + boost::container::small_vector waitSemaphores; + if (waitSemaphore) + waitSemaphores.push_back(waitSemaphore); - auto destinationBacking{GetBacking()}; - if (layout != vk::ImageLayout::eTransferDstOptimal) { - commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = destinationBacking, - .srcAccessMask = vk::AccessFlagBits::eMemoryRead, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = layout, - .newLayout = vk::ImageLayout::eTransferDstOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .subresourceRange = subresource, - }); + if (extraWaitSemaphore) + waitSemaphores.push_back(extraWaitSemaphore); - if (layout == vk::ImageLayout::eUndefined) - layout = vk::ImageLayout::eTransferDstOptimal; - } + return gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + auto sourceBacking{source->GetBacking()}; + if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = sourceBacking, + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = source->layout, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = subresource, + }); + } - vk::ImageSubresourceLayers subresourceLayers{ - .aspectMask = subresource.aspectMask, - .mipLevel = subresource.baseMipLevel, - .baseArrayLayer = subresource.baseArrayLayer, - .layerCount = subresource.layerCount == VK_REMAINING_ARRAY_LAYERS ? layerCount - subresource.baseArrayLayer : subresource.layerCount, - }; - for (; subresourceLayers.mipLevel < (subresource.levelCount == VK_REMAINING_MIP_LEVELS ? levelCount - subresource.baseMipLevel : subresource.levelCount); subresourceLayers.mipLevel++) - commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{ - .srcSubresource = subresourceLayers, - .dstSubresource = subresourceLayers, - .extent = dimensions, - }); + auto destinationBacking{GetBacking()}; + if (layout != vk::ImageLayout::eTransferDstOptimal) { + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = destinationBacking, + .srcAccessMask = vk::AccessFlagBits::eMemoryRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = layout, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = subresource, + }); - if (layout != vk::ImageLayout::eTransferDstOptimal) - commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = destinationBacking, - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .subresourceRange = subresource, - }); + if (layout == vk::ImageLayout::eUndefined) + layout = vk::ImageLayout::eTransferDstOptimal; + } - if (source->layout != vk::ImageLayout::eTransferSrcOptimal) - commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = sourceBacking, - .srcAccessMask = vk::AccessFlagBits::eTransferRead, - .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, - .oldLayout = vk::ImageLayout::eTransferSrcOptimal, - .newLayout = source->layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .subresourceRange = subresource, - }); - })}; - lCycle->AttachObjects(std::move(source), shared_from_this()); - lCycle->ChainCycle(cycle); - lCycle->ChainCycle(source->cycle); - cycle = lCycle; + vk::ImageSubresourceLayers subresourceLayers{ + .aspectMask = subresource.aspectMask, + .mipLevel = subresource.baseMipLevel, + .baseArrayLayer = subresource.baseArrayLayer, + .layerCount = subresource.layerCount == VK_REMAINING_ARRAY_LAYERS ? layerCount - subresource.baseArrayLayer : subresource.layerCount, + }; + for (; subresourceLayers.mipLevel < (subresource.levelCount == VK_REMAINING_MIP_LEVELS ? levelCount - subresource.baseMipLevel : subresource.levelCount); subresourceLayers.mipLevel++) + commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{ + .srcSubresource = subresourceLayers, + .dstSubresource = subresourceLayers, + .extent = dimensions, + }); + + if (layout != vk::ImageLayout::eTransferDstOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = destinationBacking, + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = subresource, + }); + + if (source->layout != vk::ImageLayout::eTransferSrcOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = sourceBacking, + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = source->layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = subresource, + }); + }, waitSemaphores, span{signalSemaphore}); + }}; + + auto newCycle{[&]{ + if (source->cycle) + return source->cycle->RecordSemaphoreWaitUsage(std::move(submitFunc)); + else + return submitFunc({}); + }()}; + newCycle->AttachObjects(std::move(source), shared_from_this()); + cycle = newCycle; } } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index b6ee8a8d..835f0223 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -559,7 +559,7 @@ namespace skyline::gpu { /** * @brief Copies the contents of the supplied source texture into the current texture */ - void CopyFrom(std::shared_ptr source, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{ + void CopyFrom(std::shared_ptr source, vk::Semaphore waitSemaphore, vk::Semaphore signalSemaphore, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, .levelCount = VK_REMAINING_MIP_LEVELS, .layerCount = VK_REMAINING_ARRAY_LAYERS,