diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml index 34e1abac..813a03b5 100644 --- a/.idea/inspectionProfiles/Project_Default.xml +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -171,7 +171,7 @@ - - + diff --git a/.idea/scopes/SkylineLibraries.xml b/.idea/scopes/SkylineLibraries.xml index c13db64e..2733c9e3 100644 --- a/.idea/scopes/SkylineLibraries.xml +++ b/.idea/scopes/SkylineLibraries.xml @@ -1,3 +1,3 @@ - + \ No newline at end of file diff --git a/app/src/main/cpp/skyline/common/settings.cpp b/app/src/main/cpp/skyline/common/settings.cpp index 6f40a689..5c6cbd1f 100644 --- a/app/src/main/cpp/skyline/common/settings.cpp +++ b/app/src/main/cpp/skyline/common/settings.cpp @@ -16,9 +16,9 @@ namespace skyline { #define PREF_ELEM(name, memberName, rhs) std::make_pair(std::string(name), [](Settings &settings, const pugi::xml_node &element) { settings.memberName = rhs; }) std::tuple preferences{ - PREF_ELEM("operation_mode", operationMode, element.attribute("value").as_bool()), - PREF_ELEM("username_value", username, element.text().as_string()), PREF_ELEM("log_level", logLevel, static_cast(element.text().as_uint(static_cast(Logger::LogLevel::Info)))), + PREF_ELEM("username_value", username, element.text().as_string()), + PREF_ELEM("operation_mode", operationMode, element.attribute("value").as_bool()), }; #undef PREF_ELEM diff --git a/app/src/main/cpp/skyline/common/settings.h b/app/src/main/cpp/skyline/common/settings.h index c4975586..682874b7 100644 --- a/app/src/main/cpp/skyline/common/settings.h +++ b/app/src/main/cpp/skyline/common/settings.h @@ -12,8 +12,9 @@ namespace skyline { class Settings { public: Logger::LogLevel logLevel; //!< The minimum level that logs need to be for them to be printed - bool operationMode; //!< If the emulated Switch should be handheld or docked std::string username; //!< The name set by the user to be supplied to the guest + bool operationMode; //!< If the emulated Switch should be handheld or docked + bool forceTripleBuffering{true}; //!< If the presentation should always triple buffer even if the game double buffers /** * @param fd An FD to the preference XML file diff --git a/app/src/main/cpp/skyline/gpu/fence_cycle.h b/app/src/main/cpp/skyline/gpu/fence_cycle.h index 099d5e00..a9f04ec6 100644 --- a/app/src/main/cpp/skyline/gpu/fence_cycle.h +++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h @@ -123,8 +123,8 @@ namespace skyline::gpu { } } - const auto& first{*dependencies.begin()}; - const auto& last{*dependencies.end()}; + const auto &first{*dependencies.begin()}; + const auto &last{*dependencies.end()}; std::shared_ptr next{std::atomic_load_explicit(&list, std::memory_order_consume)}; do { last->next = next; diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.cpp b/app/src/main/cpp/skyline/gpu/memory_manager.cpp index a247464c..018f465c 100644 --- a/app/src/main/cpp/skyline/gpu/memory_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/memory_manager.cpp @@ -5,13 +5,32 @@ #include "memory_manager.h" namespace skyline::gpu::memory { + /** + * @brief If the result isn't VK_SUCCESS then an exception is thrown + */ + void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION()) { + if (result != VK_SUCCESS) + vk::throwResultException(vk::Result(result), function); + } + StagingBuffer::~StagingBuffer() { + if (vmaAllocator && vmaAllocation && vkBuffer) vmaDestroyBuffer(vmaAllocator, vkBuffer, vmaAllocation); } - void MemoryManager::ThrowOnFail(VkResult result, const char *function) { - if (result != VK_SUCCESS) - vk::throwResultException(vk::Result(result), function); + Image::~Image() { + if (vmaAllocator && vmaAllocation && vkImage) { + if (pointer) + vmaUnmapMemory(vmaAllocator, vmaAllocation); + vmaDestroyImage(vmaAllocator, vkImage, vmaAllocation); + } + } + + u8 *Image::data() { + if (pointer) [[likely]] + return pointer; + ThrowOnFail(vmaMapMemory(vmaAllocator, vmaAllocation, reinterpret_cast(&pointer))); + return pointer; } MemoryManager::MemoryManager(const GPU &pGpu) : gpu(pGpu) { @@ -75,4 +94,32 @@ namespace skyline::gpu::memory { return std::make_shared(reinterpret_cast(allocationInfo.pMappedData), allocationInfo.size, vmaAllocator, buffer, allocation); } + + Image MemoryManager::AllocateImage(const vk::ImageCreateInfo &createInfo) { + VmaAllocationCreateInfo allocationCreateInfo{ + .usage = VMA_MEMORY_USAGE_GPU_ONLY, + }; + + VkImage image; + VmaAllocation allocation; + VmaAllocationInfo allocationInfo; + ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo)); + + return Image(vmaAllocator, image, allocation); + } + + Image MemoryManager::AllocateMappedImage(const vk::ImageCreateInfo &createInfo) { + VmaAllocationCreateInfo allocationCreateInfo{ + .flags = VMA_ALLOCATION_CREATE_MAPPED_BIT, + .usage = VMA_MEMORY_USAGE_UNKNOWN, + .memoryTypeBits = static_cast(vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eDeviceLocal), + }; + + VkImage image; + VmaAllocation allocation; + VmaAllocationInfo allocationInfo; + ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo)); + + return Image(reinterpret_cast(allocationInfo.pMappedData), vmaAllocator, image, allocation); + } } diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.h b/app/src/main/cpp/skyline/gpu/memory_manager.h index 7e60dd17..c4207dec 100644 --- a/app/src/main/cpp/skyline/gpu/memory_manager.h +++ b/app/src/main/cpp/skyline/gpu/memory_manager.h @@ -11,16 +11,57 @@ namespace skyline::gpu::memory { * @brief A view into a CPU mapping of a Vulkan buffer * @note The mapping **should not** be used after the lifetime of the object has ended */ - struct StagingBuffer : public span, FenceCycleDependency { + struct StagingBuffer : public span, public FenceCycleDependency { VmaAllocator vmaAllocator; VmaAllocation vmaAllocation; vk::Buffer vkBuffer; constexpr StagingBuffer(u8 *pointer, size_t size, VmaAllocator vmaAllocator, vk::Buffer vkBuffer, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkBuffer(vkBuffer), vmaAllocation(vmaAllocation), span(pointer, size) {} + StagingBuffer(const StagingBuffer &) = delete; + + constexpr StagingBuffer(StagingBuffer &&other) : vmaAllocator(std::exchange(other.vmaAllocator, nullptr)), vmaAllocation(std::exchange(other.vmaAllocation, nullptr)), vkBuffer(std::exchange(other.vkBuffer, {})) {} + + StagingBuffer &operator=(const StagingBuffer &) = delete; + + StagingBuffer &operator=(StagingBuffer &&) = default; + ~StagingBuffer(); }; + /** + * @brief A Vulkan image which VMA allocates and manages the backing memory for + */ + struct Image { + private: + u8 *pointer{}; + + public: + VmaAllocator vmaAllocator; + VmaAllocation vmaAllocation; + vk::Image vkImage; + + constexpr Image(VmaAllocator vmaAllocator, vk::Image vkImage, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkImage(vkImage), vmaAllocation(vmaAllocation) {} + + constexpr Image(u8 *pointer, VmaAllocator vmaAllocator, vk::Image vkImage, VmaAllocation vmaAllocation) : pointer(pointer), vmaAllocator(vmaAllocator), vkImage(vkImage), vmaAllocation(vmaAllocation) {} + + Image(const Image &) = delete; + + constexpr Image(Image &&other) : pointer(std::exchange(other.pointer, nullptr)), vmaAllocator(std::exchange(other.vmaAllocator, nullptr)), vmaAllocation(std::exchange(other.vmaAllocation, nullptr)), vkImage(std::exchange(other.vkImage, {})) {} + + Image &operator=(const Image &) = delete; + + Image &operator=(Image &&) = default; + + ~Image(); + + /** + * @return A pointer to a mapping of the image on the CPU + * @note If the image isn't already mapped on the CPU, this creates a mapping for it + */ + u8 *data(); + }; + /** * @brief An abstraction over memory operations done in Vulkan, it's used for all allocations on the host GPU */ @@ -29,11 +70,6 @@ namespace skyline::gpu::memory { const GPU &gpu; VmaAllocator vmaAllocator{VK_NULL_HANDLE}; - /** - * @brief If the result isn't VK_SUCCESS then an exception is thrown - */ - static void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION()); - public: MemoryManager(const GPU &gpu); @@ -43,5 +79,15 @@ namespace skyline::gpu::memory { * @brief Creates a buffer which is optimized for staging (Transfer Source) */ std::shared_ptr AllocateStagingBuffer(vk::DeviceSize size); + + /** + * @brief Creates an image which is allocated and deallocated using RAII + */ + Image AllocateImage(const vk::ImageCreateInfo &createInfo); + + /** + * @brief Creates an image which is allocated and deallocated using RAII and is optimal for being mapped on the CPU + */ + Image AllocateMappedImage(const vk::ImageCreateInfo &createInfo); }; } diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 508887c6..ff7ddfa8 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -2,15 +2,18 @@ // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) #include -#include +#include +#include #include +#include #include "presentation_engine.h" +#include "texture/format.h" extern skyline::i32 Fps; extern skyline::i32 FrameTime; namespace skyline::gpu { - PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared(state, true)), bufferEvent(std::make_shared(state, true)), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) { + PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), acquireFence(gpu.vkDevice, vk::FenceCreateInfo{}), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()), choreographerThread(&PresentationEngine::ChoreographerThread, this), vsyncEvent(std::make_shared(state, true)) { auto desc{presentationTrack.Serialize()}; desc.set_name("Presentation"); perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc); @@ -20,6 +23,26 @@ namespace skyline::gpu { auto env{state.jvm->GetEnv()}; if (!env->IsSameObject(jSurface, nullptr)) env->DeleteGlobalRef(jSurface); + + if (choreographerThread.joinable()) { + if (choreographerLooper) + ALooper_wake(choreographerLooper); + choreographerThread.join(); + } + } + + /** + * @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback + */ + void ChoreographerCallback(long frameTimeNanos, kernel::type::KEvent* vsyncEvent) { + vsyncEvent->Signal(); + AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), vsyncEvent); + } + + void PresentationEngine::ChoreographerThread() { + choreographerLooper = ALooper_prepare(0); + AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), vsyncEvent.get()); + ALooper_pollAll(-1, nullptr, nullptr, nullptr); } service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) { @@ -45,58 +68,55 @@ namespace skyline::gpu { } } - void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface) { - if (!imageCount) - return; - else if (imageCount > service::hosbinder::GraphicBufferProducer::MaxSlotCount) - throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", imageCount, service::hosbinder::GraphicBufferProducer::MaxSlotCount); + void PresentationEngine::UpdateSwapchain(texture::Format format, texture::Dimensions extent) { + auto minImageCount{std::max(vkSurfaceCapabilities.minImageCount, state.settings->forceTripleBuffering ? 3U : 0U)}; + if (minImageCount > MaxSlotCount) + throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount); const auto &capabilities{vkSurfaceCapabilities}; - if (imageCount < capabilities.minImageCount || (capabilities.maxImageCount && imageCount > capabilities.maxImageCount)) - throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", imageCount, capabilities.minImageCount, capabilities.maxImageCount); - if (capabilities.minImageExtent.height > imageExtent.height || capabilities.minImageExtent.width > imageExtent.width || capabilities.maxImageExtent.height < imageExtent.height || capabilities.maxImageExtent.width < imageExtent.width) - throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", imageExtent.width, imageExtent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height); + if (minImageCount < capabilities.minImageCount || (capabilities.maxImageCount && minImageCount > capabilities.maxImageCount)) + throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", minImageCount, capabilities.minImageCount, capabilities.maxImageCount); + else if (capabilities.minImageExtent.height > extent.height || capabilities.minImageExtent.width > extent.width || capabilities.maxImageExtent.height < extent.height || capabilities.maxImageExtent.width < extent.width) + throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", extent.width, extent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height); - if (swapchain.imageFormat != imageFormat || newSurface) { + if (swapchainFormat != format) { auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)}; - if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{imageFormat, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end()) - throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(imageFormat), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear)); + if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{format, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end()) + throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(format), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear)); } constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst}; if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage) throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags)); - vkSwapchain = vk::raii::SwapchainKHR(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ + vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ .surface = **vkSurface, - .minImageCount = imageCount, - .imageFormat = imageFormat, + .minImageCount = minImageCount, + .imageFormat = format, .imageColorSpace = vk::ColorSpaceKHR::eSrgbNonlinear, - .imageExtent = imageExtent, + .imageExtent = extent, .imageArrayLayers = 1, .imageUsage = presentUsage, .imageSharingMode = vk::SharingMode::eExclusive, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit, - .presentMode = vk::PresentModeKHR::eFifo, - .clipped = false, - .oldSwapchain = vkSwapchain ? **vkSwapchain : vk::SwapchainKHR{}, + .presentMode = vk::PresentModeKHR::eMailbox, + .clipped = true, }); auto vkImages{vkSwapchain->getImages()}; - for (u16 slot{}; slot < imageCount; slot++) { - auto &vkImage{vkImages[slot]}; - swapchain.vkImages[slot] = vkImage; - auto &image{swapchain.textures[slot]}; - if (image) { - std::scoped_lock lock(*image); - image->SwapBacking(vkImage); - image->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); - image->SynchronizeHost(); // Synchronize the new host backing with guest memory - } + if (vkImages.size() > MaxSlotCount) + throw exception("Swapchain has higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount); + + for (size_t index{}; index < vkImages.size(); index++) { + auto &slot{slots[index]}; + slot = std::make_shared(*state.gpu, vkImages[index], extent, format::GetFormat(format), vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal); + slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); } - swapchain.imageCount = imageCount; - swapchain.imageFormat = imageFormat; - swapchain.imageExtent = imageExtent; + for (size_t index{vkImages.size()}; index < MaxSlotCount; index++) + slots[index] = {}; + + swapchainFormat = format; + swapchainExtent = extent; } void PresentationEngine::UpdateSurface(jobject newSurface) { @@ -110,18 +130,7 @@ namespace skyline::gpu { if (!env->IsSameObject(newSurface, nullptr)) jSurface = env->NewGlobalRef(newSurface); - if (vkSwapchain) { - for (u16 slot{}; slot < swapchain.imageCount; slot++) { - auto &image{swapchain.textures[slot]}; - if (image) { - std::scoped_lock lock(*image); - image->SynchronizeGuest(); // Synchronize host backing to guest memory prior to being destroyed - image->SwapBacking(nullptr); - } - } - swapchain.vkImages = {}; - vkSwapchain.reset(); - } + vkSwapchain.reset(); if (jSurface) { vkSurface.emplace(gpu.vkInstance, vk::AndroidSurfaceCreateInfoKHR{ @@ -131,7 +140,8 @@ namespace skyline::gpu { throw exception("Vulkan Queue doesn't support presentation with surface"); vkSurfaceCapabilities = gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface); - UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent, true); + if (swapchainExtent && swapchainFormat) + UpdateSwapchain(swapchainFormat, swapchainExtent); surfaceCondition.notify_all(); } else { @@ -139,60 +149,32 @@ namespace skyline::gpu { } } - std::shared_ptr PresentationEngine::CreatePresentationTexture(const std::shared_ptr &texture, u8 slot) { - std::lock_guard guard(mutex); - if (swapchain.imageCount <= slot && slot + 1 >= vkSurfaceCapabilities.minImageCount) - UpdateSwapchain(slot + 1, texture->format.vkFormat, texture->dimensions); - auto host{texture->InitializeTexture(swapchain.vkImages.at(slot), vk::ImageTiling::eOptimal)}; - swapchain.textures[slot] = host; - return host; - } - - service::hosbinder::AndroidStatus PresentationEngine::GetFreeTexture(bool async, i32 &slot) { - using AndroidStatus = service::hosbinder::AndroidStatus; - + void PresentationEngine::Present(const std::shared_ptr &texture, u64 presentId) { std::unique_lock lock(mutex); surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - if (swapchain.dequeuedCount < swapchain.imageCount) { - static vk::raii::Fence fence(gpu.vkDevice, vk::FenceCreateInfo{}); - auto timeout{async ? 0ULL : std::numeric_limits::max()}; // We cannot block for a buffer to be retrieved in async mode - auto nextImage{vkSwapchain->acquireNextImage(timeout, {}, *fence)}; - if (nextImage.first == vk::Result::eSuccess) { - swapchain.dequeuedCount++; - while (gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits::max()) == vk::Result::eTimeout); - slot = nextImage.second; - return AndroidStatus::Ok; - } else if (nextImage.first == vk::Result::eNotReady || nextImage.first == vk::Result::eTimeout) { - return AndroidStatus::WouldBlock; - } else if (nextImage.first == vk::Result::eSuboptimalKHR) { + + if (texture->format != swapchainFormat || texture->dimensions != swapchainExtent) + UpdateSwapchain(texture->format, texture->dimensions); + + std::pair nextImage; + while ((nextImage = vkSwapchain->acquireNextImage(std::numeric_limits::max(), {}, *acquireFence)).first != vk::Result::eSuccess) [[unlikely]] + if (nextImage.first == vk::Result::eSuboptimalKHR) surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - return GetFreeTexture(async, slot); - } else { - throw exception("VkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); - } - } - return AndroidStatus::Busy; - } + else + throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); + while (gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits::max()) == vk::Result::eTimeout); - void PresentationEngine::Present(u32 slot) { - std::unique_lock lock(mutex); - surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - - if (--swapchain.dequeuedCount < 0) [[unlikely]] { - throw exception("Swapchain has been presented more times than images from it have been acquired: {} (Image Count: {})", swapchain.dequeuedCount, swapchain.imageCount); - } + slots.at(nextImage.second)->CopyFrom(texture); { std::lock_guard queueLock(gpu.queueMutex); static_cast(gpu.vkQueue.presentKHR(vk::PresentInfoKHR{ .swapchainCount = 1, .pSwapchains = &**vkSwapchain, - .pImageIndices = &slot, + .pImageIndices = &nextImage.second, })); // We explicitly discard the result here as suboptimal images are expected when the game doesn't respect the transform hint } - vsyncEvent->Signal(); - if (frameTimestamp) { auto now{util::GetTimeNs()}; FrameTime = static_cast((now - frameTimestamp) / 10000); // frametime / 100 is the real ms value, this is to retain the first two decimals diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.h b/app/src/main/cpp/skyline/gpu/presentation_engine.h index 65a6ee81..b0696537 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.h +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -27,31 +28,31 @@ namespace skyline::gpu { vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities; //!< The capabilities of the current Vulkan Surface std::optional vkSwapchain; //!< The Vulkan swapchain and the properties associated with it - struct SwapchainContext { - std::array, service::hosbinder::GraphicBufferProducer::MaxSlotCount> textures{}; - std::array vkImages{VK_NULL_HANDLE}; - u8 imageCount{}; - i8 dequeuedCount{}; - vk::Format imageFormat{}; - vk::Extent2D imageExtent{}; + vk::raii::Fence acquireFence; //!< A fence for acquiring an image from the swapchain + texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain + texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain - static_assert(std::numeric_limits::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount); - static_assert(std::numeric_limits::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount); - } swapchain; //!< The properties of the currently created swapchain + static constexpr size_t MaxSlotCount{6}; //!< The maximum amount of queue slots, this affects the amount of images that can be in the swapchain + std::array, MaxSlotCount> slots; //!< The backing for storing all slots and sorted in the same order as supplied by the Vulkan swapchain u64 frameTimestamp{}; //!< The timestamp of the last frame being shown perfetto::Track presentationTrack; //!< Perfetto track used for presentation events + std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer + ALooper* choreographerLooper{}; //!< The looper object associated with the Choreographer thread + + /** + * @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread + */ + void ChoreographerThread(); + /** * @note 'PresentationEngine::mutex' **must** be locked prior to calling this */ - void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface = false); + void UpdateSwapchain(texture::Format format, texture::Dimensions extent); public: - texture::Dimensions resolution{}; - i32 format{}; std::shared_ptr vsyncEvent; //!< Signalled every time a frame is drawn - std::shared_ptr bufferEvent; //!< Signalled every time a buffer is freed PresentationEngine(const DeviceState &state, GPU &gpu); @@ -63,20 +64,11 @@ namespace skyline::gpu { void UpdateSurface(jobject newSurface); /** - * @brief Creates a Texture object from a GuestTexture as a part of the Vulkan swapchain + * @brief Queue the supplied texture to be presented to the screen + * @param presentId A UUID used to tag this frame for presentation timing readouts + * @note The texture **must** be locked prior to calling this */ - std::shared_ptr CreatePresentationTexture(const std::shared_ptr &texture, u8 slot); - - /** - * @param async If to return immediately when a texture is not available - * @param slot The slot the freed texture is in is written into this, it is untouched if there's an error - */ - service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32 &slot); - - /** - * @brief Send a texture from a slot to the presentation queue to be displayed - */ - void Present(u32 slot); + void Present(const std::shared_ptr &texture, u64 presentId); /** * @return A transform that the application should render with to elide costly transforms later diff --git a/app/src/main/cpp/skyline/gpu/texture/format.h b/app/src/main/cpp/skyline/gpu/texture/format.h index 666b64f0..cbbf3321 100644 --- a/app/src/main/cpp/skyline/gpu/texture/format.h +++ b/app/src/main/cpp/skyline/gpu/texture/format.h @@ -10,4 +10,18 @@ namespace skyline::gpu::format { constexpr Format RGBA8888Unorm{sizeof(u8) * 4, 1, 1, vk::Format::eR8G8B8A8Unorm}; //!< 8-bits per channel 4-channel pixels constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vk::Format::eR5G6B5UnormPack16}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit + + /** + * @brief Converts a format from Vulkan to a Skyline format + */ + constexpr const Format &GetFormat(vk::Format format) { + switch (format) { + case vk::Format::eR8G8B8A8Unorm: + return RGBA8888Unorm; + case vk::Format::eR5G6B5UnormPack16: + return RGB565Unorm; + default: + throw exception("Vulkan format not supported: '{}'", vk::to_string(format)); + } + } } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index c693f762..700330f5 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -7,29 +7,76 @@ #include "texture.h" namespace skyline::gpu { - GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} + GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format &format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} - std::shared_ptr GuestTexture::InitializeTexture(vk::Image backing, std::optional tiling, vk::ImageLayout pLayout, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { + std::shared_ptr GuestTexture::InitializeTexture(vk::Image backing, texture::Dimensions pDimensions, const texture::Format &pFormat, std::optional tiling, vk::ImageLayout layout, texture::Swizzle swizzle) { if (!host.expired()) throw exception("Trying to create multiple Texture objects from a single GuestTexture"); - auto sharedHost{std::make_shared(*state.gpu, backing, pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; + auto sharedHost{std::make_shared(*state.gpu, backing, shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; host = sharedHost; return sharedHost; } - std::shared_ptr GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional tiling, vk::ImageLayout pLayout, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { + std::shared_ptr GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional tiling, vk::ImageLayout layout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) { if (!host.expired()) throw exception("Trying to create multiple Texture objects from a single GuestTexture"); - auto sharedHost{std::make_shared(*state.gpu, std::move(backing), pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; + auto sharedHost{std::make_shared(*state.gpu, std::move(backing), shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; host = sharedHost; return sharedHost; } - Texture::Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) { + std::shared_ptr GuestTexture::CreateTexture(vk::ImageUsageFlags usage, std::optional pTiling, vk::ImageLayout initialLayout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) { + if (!host.expired()) + throw exception("Trying to create multiple Texture objects from a single GuestTexture"); + + pDimensions = pDimensions ? pDimensions : dimensions; + const auto &lFormat{pFormat ? pFormat : format}; + auto tiling{pTiling ? *pTiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear}; + vk::ImageCreateInfo imageCreateInfo{ + .imageType = pDimensions.GetType(), + .format = lFormat, + .extent = pDimensions, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .tiling = tiling, + .usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + .sharingMode = vk::SharingMode::eExclusive, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &state.gpu->vkQueueFamilyIndex, + .initialLayout = initialLayout, + }; + + auto sharedHost{std::make_shared(*state.gpu, tiling != vk::ImageTiling::eLinear ? state.gpu->memory.AllocateImage(imageCreateInfo) : state.gpu->memory.AllocateMappedImage(imageCreateInfo), shared_from_this(), pDimensions, lFormat, initialLayout, tiling, swizzle)}; + host = sharedHost; + return sharedHost; + } + + Texture::Texture(GPU &gpu, BackingType &&backing, std::shared_ptr guest, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) { if (GetBacking()) SynchronizeHost(); } + Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), guest(nullptr), dimensions(dimensions), format(format), layout(layout), tiling(tiling), mapping(mapping) {} + + Texture::Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), guest(nullptr), dimensions(dimensions), format(format), layout(initialLayout), tiling(tiling), mapping(mapping) { + vk::ImageCreateInfo imageCreateInfo{ + .imageType = dimensions.GetType(), + .format = format, + .extent = dimensions, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .tiling = tiling, + .usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + .sharingMode = vk::SharingMode::eExclusive, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex, + .initialLayout = initialLayout, + }; + backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo); + } + bool Texture::WaitOnBacking() { if (GetBacking()) [[likely]] { return false; @@ -83,13 +130,23 @@ namespace skyline::gpu { } void Texture::SynchronizeHost() { + if (!guest) + throw exception("Synchronization of host textures requires a valid guest texture to synchronize from"); + TRACE_EVENT("gpu", "Texture::SynchronizeHost"); auto pointer{guest->pointer}; auto size{format.GetSize(dimensions)}; - auto stagingBuffer{[&]() { - if (tiling == vk::ImageTiling::eOptimal) { - return gpu.memory.AllocateStagingBuffer(size); + u8 *bufferData; + auto stagingBuffer{[&]() -> std::shared_ptr { + if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative(backing)) { + auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)}; + bufferData = stagingBuffer->data(); + return stagingBuffer; + } else if (tiling == vk::ImageTiling::eLinear) { + bufferData = std::get(backing).data(); + WaitOnFence(); + return nullptr; } else { throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling)); } @@ -112,7 +169,7 @@ namespace skyline::gpu { auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space auto inputSector{pointer}; // The address of the input sector - auto outputRob{stagingBuffer->data()}; // The address of the output block + auto outputRob{bufferData}; // The address of the output block for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs auto outputBlock{outputRob}; // We iterate through a block independently of the ROB @@ -141,7 +198,7 @@ namespace skyline::gpu { auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data auto inputLine{pointer}; // The address of the input line - auto outputLine{stagingBuffer->data()}; // The address of the output line + auto outputLine{bufferData}; // The address of the output line for (u32 line{}; line < dimensions.height; line++) { std::memcpy(outputLine, inputLine, sizeLine); @@ -149,18 +206,113 @@ namespace skyline::gpu { outputLine += sizeLine; } } else if (guest->tileMode == texture::TileMode::Linear) { - std::memcpy(stagingBuffer->data(), pointer, size); + std::memcpy(bufferData, pointer, size); } - if (WaitOnBacking() && size != format.GetSize(dimensions)) - throw exception("Backing properties changing during sync is not supported"); + if (stagingBuffer) { + if (WaitOnBacking() && size != format.GetSize(dimensions)) + throw exception("Backing properties changing during sync is not supported"); + WaitOnFence(); + + cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + auto image{GetBacking()}; + if (layout != vk::ImageLayout::eTransferDstOptimal) { + commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = image, + .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = layout, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + + if (layout == vk::ImageLayout::eUndefined) + layout = vk::ImageLayout::eTransferDstOptimal; + } + + commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{ + .imageExtent = dimensions, + .imageSubresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .layerCount = 1, + }, + }); + + if (layout != vk::ImageLayout::eTransferDstOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = image, + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + }); + + cycle->AttachObject(stagingBuffer); + } + } + + void Texture::SynchronizeGuest() { + if (!guest) + throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); + + WaitOnBacking(); WaitOnFence(); + TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); + // TODO: Write Host -> Guest Synchronization + } + + void Texture::CopyFrom(std::shared_ptr source) { + WaitOnBacking(); + WaitOnFence(); + + source->WaitOnBacking(); + source->WaitOnFence(); + + if (source->layout == vk::ImageLayout::eUndefined) + throw exception("Cannot copy from image with undefined layout"); + else if (source->dimensions != dimensions) + throw exception("Cannot copy from image with different dimensions"); + else if (source->format != format) + throw exception("Cannot copy from image with different format"); + cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { - auto image{GetBacking()}; + auto sourceBacking{source->GetBacking()}; + if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = sourceBacking, + .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = source->layout, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + } + + auto destinationBacking{GetBacking()}; if (layout != vk::ImageLayout::eTransferDstOptimal) { commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = image, + .image = destinationBacking, .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .dstAccessMask = vk::AccessFlagBits::eTransferWrite, .oldLayout = layout, @@ -178,17 +330,21 @@ namespace skyline::gpu { layout = vk::ImageLayout::eTransferDstOptimal; } - commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{ - .imageExtent = dimensions, - .imageSubresource = { + commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{ + .srcSubresource = { .aspectMask = vk::ImageAspectFlagBits::eColor, .layerCount = 1, }, + .dstSubresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .layerCount = 1, + }, + .extent = dimensions, }); if (layout != vk::ImageLayout::eTransferDstOptimal) commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = image, + .image = destinationBacking, .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryRead, .oldLayout = vk::ImageLayout::eTransferDstOptimal, @@ -201,16 +357,23 @@ namespace skyline::gpu { .layerCount = 1, }, }); + + if (layout != vk::ImageLayout::eTransferSrcOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = sourceBacking, + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = source->layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); }); - - cycle->AttachObject(stagingBuffer); - } - - void Texture::SynchronizeGuest() { - WaitOnBacking(); - WaitOnFence(); - - TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); - // TODO: Write Host -> Guest Synchronization + cycle->AttachObject(source); } } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 61807363..4a4bc686 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -14,13 +14,19 @@ namespace skyline::gpu { constexpr Dimensions() : width(0), height(0), depth(0) {} + constexpr Dimensions(u32 width) : width(width), height(1), depth(1) {} + constexpr Dimensions(u32 width, u32 height) : width(width), height(height), depth(1) {} constexpr Dimensions(u32 width, u32 height, u32 depth) : width(width), height(height), depth(depth) {} + constexpr Dimensions(vk::Extent2D extent) : Dimensions(extent.width, extent.height) {} + + constexpr Dimensions(vk::Extent3D extent) : Dimensions(extent.width, extent.height, extent.depth) {} + auto operator<=>(const Dimensions &) const = default; - vk::ImageType GetType() { + constexpr vk::ImageType GetType() const { if (depth) return vk::ImageType::e3D; else if (width) @@ -29,32 +35,39 @@ namespace skyline::gpu { return vk::ImageType::e1D; } - operator vk::Extent2D() { + constexpr operator vk::Extent2D() const { return vk::Extent2D{ .width = width, .height = height, }; } - operator vk::Extent3D() { + constexpr operator vk::Extent3D() const { return vk::Extent3D{ .width = width, .height = height, .depth = depth, }; } + + /** + * @return If the dimensions are valid and don't equate to zero + */ + constexpr operator bool() const { + return width && height && depth; + } }; /** * @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed) */ struct Format { - u8 bpb; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats - u16 blockHeight; //!< The height of a block in pixels - u16 blockWidth; //!< The width of a block in pixels - vk::Format vkFormat; + u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats + u16 blockHeight{}; //!< The height of a block in pixels + u16 blockWidth{}; //!< The width of a block in pixels + vk::Format vkFormat{vk::Format::eUndefined}; - constexpr bool IsCompressed() { + constexpr bool IsCompressed() const { return (blockHeight != 1) || (blockWidth != 1); } @@ -64,26 +77,30 @@ namespace skyline::gpu { * @param depth The depth of the texture in layers * @return The size of the texture in bytes */ - constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) { + constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) const { return (((width / blockWidth) * (height / blockHeight)) * bpb) * depth; } - constexpr size_t GetSize(Dimensions dimensions) { + constexpr size_t GetSize(Dimensions dimensions) const { return GetSize(dimensions.width, dimensions.height, dimensions.depth); } - constexpr bool operator==(const Format &format) { + constexpr bool operator==(const Format &format) const { return vkFormat == format.vkFormat; } - constexpr bool operator!=(const Format &format) { + constexpr bool operator!=(const Format &format) const { return vkFormat != format.vkFormat; } + constexpr operator vk::Format() const { + return vkFormat; + } + /** * @return If this format is actually valid or not */ - constexpr operator bool() { + constexpr operator bool() const { return bpb; } }; @@ -171,7 +188,7 @@ namespace skyline::gpu { texture::TileMode tileMode; texture::TileConfig tileConfig; - GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {}); + GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format& format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {}); constexpr size_t Size() { return format.GetSize(dimensions); @@ -180,32 +197,39 @@ namespace skyline::gpu { /** * @brief Creates a corresponding host texture object for this guest texture * @param backing The Vulkan Image that is used as the backing on the host, its lifetime is not managed by the host texture object + * @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture) + * @param format The format of the host texture (Defaults to the format of the guest texture) * @param tiling The tiling used by the image on host, this is the same as guest by default * @param layout The initial layout of the Vulkan Image, this is used for efficient layout management - * @param format The format of the host texture (Defaults to the format of the guest texture) - * @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture) * @param swizzle The channel swizzle of the host texture (Defaults to no channel swizzling) * @return A shared pointer to the host texture object * @note There can only be one host texture for a corresponding guest texture + * @note If any of the supplied parameters do not match up with the backing then it's undefined behavior */ - std::shared_ptr InitializeTexture(vk::Image backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); + std::shared_ptr InitializeTexture(vk::Image backing, texture::Dimensions dimensions = {}, const texture::Format& format = {}, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, texture::Swizzle swizzle = {}); /** * @note As a RAII object is used here, the lifetime of the backing is handled by the host texture */ - std::shared_ptr InitializeTexture(vk::raii::Image &&backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); + std::shared_ptr InitializeTexture(vk::raii::Image &&backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {}); + + /** + * @brief Similar to InitializeTexture but creation of the backing and allocation of memory for the backing is automatically performed by the function + * @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory + */ + std::shared_ptr CreateTexture(vk::ImageUsageFlags usage = {}, std::optional tiling = std::nullopt, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {}); }; /** * @brief A texture which is backed by host constructs while being synchronized with the underlying guest texture * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - class Texture { + class Texture : public FenceCycleDependency { private: GPU &gpu; std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in - using BackingType = std::variant; + using BackingType = std::variant; BackingType backing; //!< The Vulkan image that backs this texture, it is nullable std::shared_ptr cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing vk::ImageLayout layout; @@ -217,17 +241,26 @@ namespace skyline::gpu { return std::visit(VariantVisitor{ [](vk::Image image) { return image; }, [](const vk::raii::Image &image) { return *image; }, + [](const memory::Image &image) { return image.vkImage; }, }, backing); } public: - std::shared_ptr guest; //!< The guest texture from which this was created, it's required for syncing and not nullable + std::shared_ptr guest; //!< The guest texture from which this was created, it's required for syncing texture::Dimensions dimensions; texture::Format format; vk::ImageTiling tiling; vk::ComponentMapping mapping; - Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping); + Texture(GPU &gpu, BackingType &&backing, std::shared_ptr guest, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping); + + Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping = {}); + + /** + * @brief Creates and allocates memory for the backing to creates a texture object wrapping it + * @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory + */ + Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, vk::ComponentMapping mapping = {}); /** * @brief Acquires an exclusive lock on the texture for the calling thread @@ -300,13 +333,20 @@ namespace skyline::gpu { /** * @brief Synchronizes the host texture with the guest after it has been modified * @note The texture **must** be locked prior to calling this + * @note The guest texture should not be null prior to calling this */ void SynchronizeHost(); /** * @brief Synchronizes the guest texture with the host texture after it has been modified * @note The texture **must** be locked prior to calling this + * @note The guest texture should not be null prior to calling this */ void SynchronizeGuest(); + + /** + * @brief Copies the contents of the supplied source texture into the current texture + */ + void CopyFrom(std::shared_ptr source); }; } diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp index 18686b2a..f84c9a7e 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp @@ -14,7 +14,7 @@ #include "GraphicBufferProducer.h" namespace skyline::service::hosbinder { - GraphicBufferProducer::GraphicBufferProducer(const DeviceState &state) : state(state) {} + GraphicBufferProducer::GraphicBufferProducer(const DeviceState &state) : state(state), bufferEvent(std::make_shared(state, true)) {} u8 GraphicBufferProducer::GetPendingBufferCount() { u8 count{}; @@ -45,36 +45,53 @@ namespace skyline::service::hosbinder { return AndroidStatus::BadValue; } - constexpr i32 invalidGraphicBufferSlot{-1}; //!< https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueCore.h;l=61 - slot = invalidGraphicBufferSlot; + constexpr i32 InvalidGraphicBufferSlot{-1}; //!< https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueCore.h;l=61 + slot = InvalidGraphicBufferSlot; std::lock_guard guard(mutex); - auto result{state.gpu->presentation.GetFreeTexture(async, slot)}; - if (result != AndroidStatus::Ok) [[unlikely]] { - if (result == AndroidStatus::Busy) - state.logger->Warn("No free buffers to dequeue"); - return result; + auto buffer{queue.end()}; + while (true) { + size_t dequeuedSlotCount{}; + for (auto it{queue.begin()}; it != queue.end(); it++) { + // We want to select the oldest slot that's free to use as we'd want all slots to be used + // If we go linearly then we have a higher preference for selecting the former slots and being out of order + if (it->state == BufferState::Free && it->texture) { + if (buffer == queue.end() || it->frameNumber < buffer->frameNumber) + buffer = it; + else if (it->state == BufferState::Dequeued) + dequeuedSlotCount++; + } + } + + if (buffer != queue.end()) { + slot = std::distance(queue.begin(), buffer); + break; + } else if (async) { + return AndroidStatus::WouldBlock; + } else if (dequeuedSlotCount == queue.size()) { + state.logger->Warn("Client attempting to dequeue more buffers when all buffers are dequeued by the client: {}", dequeuedSlotCount); + return AndroidStatus::InvalidOperation; + } } width = width ? width : defaultWidth; height = height ? height : defaultHeight; format = (format != AndroidPixelFormat::None) ? format : defaultFormat; - auto &buffer{queue.at(slot)}; - if (!buffer.graphicBuffer) { + if (!buffer->graphicBuffer) { // Horizon OS doesn't ever allocate memory for the buffers on the GraphicBufferProducer end // All buffers must be preallocated on the client application and attached to an Android buffer using SetPreallocatedBuffer return AndroidStatus::NoMemory; } - auto &surface{buffer.graphicBuffer->graphicHandle.surfaces.front()}; - if (buffer.graphicBuffer->format != format || surface.width != width || surface.height != height || (buffer.graphicBuffer->usage & usage) != usage) { - state.logger->Warn("Buffer which has been dequeued isn't compatible with the supplied parameters: Dimensions: {}x{}={}x{}, Format: {}={}, Usage: 0x{:X}=0x{:X}", width, height, surface.width, surface.height, ToString(format), ToString(buffer.graphicBuffer->format), usage, buffer.graphicBuffer->usage); + auto &surface{buffer->graphicBuffer->graphicHandle.surfaces.front()}; + if (buffer->graphicBuffer->format != format || surface.width != width || surface.height != height || (buffer->graphicBuffer->usage & usage) != usage) { + state.logger->Warn("Buffer which has been dequeued isn't compatible with the supplied parameters: Dimensions: {}x{}={}x{}, Format: {}={}, Usage: 0x{:X}=0x{:X}", width, height, surface.width, surface.height, ToString(format), ToString(buffer->graphicBuffer->format), usage, buffer->graphicBuffer->usage); // Nintendo doesn't deallocate the slot which was picked in here and reallocate it as a compatible buffer // This is related to the comment above, Nintendo only allocates buffers on the client side return AndroidStatus::NoInit; } - buffer.state = BufferState::Dequeued; + buffer->state = BufferState::Dequeued; fence = AndroidFence{}; // We just let the presentation engine return a buffer which is ready to be written into, there is no need for further synchronization state.logger->Debug("#{} - Dimensions: {}x{}, Format: {}, Usage: 0x{:X}, Is Async: {}", slot, width, height, ToString(format), usage, async); @@ -106,7 +123,7 @@ namespace skyline::service::hosbinder { return AndroidStatus::BadValue; } else if (!buffer.wasBufferRequested) [[unlikely]] { state.logger->Warn("#{} was queued prior to being requested", slot); - return AndroidStatus::BadValue; + buffer.wasBufferRequested = true; // Switch ignores this and doesn't return an error, certain homebrew ends up depending on this behavior } auto graphicBuffer{*buffer.graphicBuffer}; @@ -139,13 +156,16 @@ namespace skyline::service::hosbinder { fence.Wait(state.soc->host1x); { - std::scoped_lock textureLock(*buffer.texture); - buffer.texture->SynchronizeHost(); - buffer.texture->WaitOnFence(); - state.gpu->presentation.Present(slot); - state.gpu->presentation.bufferEvent->Signal(); + auto &texture{buffer.texture}; + std::scoped_lock textureLock(*texture); + texture->SynchronizeHost(); + state.gpu->presentation.Present(texture, ++frameNumber); } + buffer.frameNumber = frameNumber; + buffer.state = BufferState::Free; + bufferEvent->Signal(); + width = defaultWidth; height = defaultHeight; transformHint = state.gpu->presentation.GetTransformHint(); @@ -169,11 +189,10 @@ namespace skyline::service::hosbinder { } fence.Wait(state.soc->host1x); - state.gpu->presentation.Present(slot); // We use a present as a way to free the buffer so that it can be acquired in dequeueBuffer again buffer.state = BufferState::Free; buffer.frameNumber = 0; - state.gpu->presentation.bufferEvent->Signal(); + bufferEvent->Signal(); state.logger->Debug("#{}", slot); } @@ -349,7 +368,7 @@ namespace skyline::service::hosbinder { throw exception("Surface doesn't fit into NvMap mapping of size 0x{:X} when mapped at 0x{:X} -> 0x{:X}", nvBuffer->size, surface.offset, surface.offset + surface.size); gpu::texture::TileMode tileMode; - gpu::texture::TileConfig tileConfig; + gpu::texture::TileConfig tileConfig{}; if (surface.layout == NvSurfaceLayout::Blocklinear) { tileMode = gpu::texture::TileMode::Block; tileConfig = { @@ -373,11 +392,11 @@ namespace skyline::service::hosbinder { buffer.frameNumber = 0; buffer.wasBufferRequested = false; buffer.graphicBuffer = std::make_unique(graphicBuffer); - buffer.texture = state.gpu->presentation.CreatePresentationTexture(texture, slot); + buffer.texture = texture->CreateTexture({}, vk::ImageTiling::eLinear, vk::ImageLayout::eGeneral); activeSlotCount = hasBufferCount = std::count_if(queue.begin(), queue.end(), [](const BufferSlot &slot) { return static_cast(slot.graphicBuffer); }); - state.gpu->presentation.bufferEvent->Signal(); + bufferEvent->Signal(); state.logger->Debug("#{} - Dimensions: {}x{} [Stride: {}], Format: {}, Layout: {}, {}: {}, Usage: 0x{:X}, NvMap {}: {}, Buffer Start/End: 0x{:X} -> 0x{:X}", slot, surface.width, surface.height, handle.stride, ToString(graphicBuffer.format), ToString(surface.layout), surface.layout == NvSurfaceLayout::Blocklinear ? "Block Height" : "Pitch", surface.layout == NvSurfaceLayout::Blocklinear ? 1U << surface.blockHeightLog2 : surface.pitch, graphicBuffer.usage, surface.nvmapHandle ? "Handle" : "ID", surface.nvmapHandle ? surface.nvmapHandle : handle.nvmapId, surface.offset, surface.offset + surface.size); return AndroidStatus::Ok; diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h index 508bc1e0..d2eba053 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include "android_types.h" #include "native_window.h" @@ -80,12 +81,10 @@ namespace skyline::service::hosbinder { * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueCore.cpp */ class GraphicBufferProducer { - public: - constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29) - private: const DeviceState &state; std::mutex mutex; //!< Synchronizes access to the buffer queue + constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29) std::array queue; u8 activeSlotCount{2}; //!< The amount of slots in the queue that can be used u8 hasBufferCount{}; //!< The amount of slots with buffers attached in the queue @@ -93,6 +92,7 @@ namespace skyline::service::hosbinder { u32 defaultHeight{1}; //!< The assumed height of a buffer if none is supplied in DequeueBuffer AndroidPixelFormat defaultFormat{AndroidPixelFormat::RGBA8888}; //!< The assumed format of a buffer if none is supplied in DequeueBuffer NativeWindowApi connectedApi{NativeWindowApi::None}; //!< The API that the producer is currently connected to + u64 frameNumber{}; //!< The amount of frames that have been presented so far /** * @return The amount of buffers which have been queued onto the consumer @@ -156,6 +156,7 @@ namespace skyline::service::hosbinder { AndroidStatus SetPreallocatedBuffer(i32 slot, const GraphicBuffer &graphicBuffer); public: + std::shared_ptr bufferEvent; //!< Signalled every time a buffer in the queue is freed DisplayId displayId{DisplayId::Null}; //!< The ID of this display LayerStatus layerStatus{LayerStatus::Uninitialized}; //!< The status of the single layer the display has diff --git a/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp b/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp index 5713c06f..9868eb33 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp +++ b/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp @@ -40,7 +40,7 @@ namespace skyline::service::hosbinder { } Result IHOSBinderDriver::GetNativeHandle(type::KSession &session, ipc::IpcRequest &request, ipc::IpcResponse &response) { - KHandle handle{state.process->InsertItem(state.gpu->presentation.bufferEvent)}; + KHandle handle{state.process->InsertItem(producer->bufferEvent)}; state.logger->Debug("Display Buffer Event Handle: 0x{:X}", handle); response.copyHandles.push_back(handle); diff --git a/app/src/main/cpp/skyline/services/hosbinder/android_types.h b/app/src/main/cpp/skyline/services/hosbinder/android_types.h index f146e8e3..91f29efe 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/android_types.h +++ b/app/src/main/cpp/skyline/services/hosbinder/android_types.h @@ -45,12 +45,18 @@ namespace skyline::service::hosbinder { /** * @brief Nvidia and Nintendo's Android fence implementation, this significantly differs from the Android implementation (All FDs are inlined as integers rather than explicitly passed as FDs) but is a direct replacement * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/ui/Fence.h + * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp */ struct AndroidFence { u32 fenceCount{}; //!< The amount of active fences in the array std::array fences{}; //!< Nvidia's Android fence can hold a maximum of 4 fence FDs - AndroidFence() : fenceCount(0) {} + static constexpr u32 InvalidFenceId{0xFFFFFFFF}; //!< A magic value for the syncpoint ID of invalid fences (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/ui/Fence.h;l=61) + + /** + * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp;l=34-36 + */ + AndroidFence() : fenceCount(0), fences({InvalidFenceId}) {} /** * @brief Wait on all native fences in this Android fence till they're signalled @@ -59,8 +65,8 @@ namespace skyline::service::hosbinder { if (fenceCount > fences.size()) throw exception("Wait has larger fence count ({}) than storage size ({})", fenceCount, fences.size()); for (auto it{fences.begin()}, end{fences.begin() + fenceCount}; it < end; it++) - if (!host1x.syncpoints.at(it->id).Wait(it->value, std::chrono::steady_clock::duration::max())) - throw exception("Waiting on native fence #{} (Host1X Syncpoint: {}) has timed out", std::distance(fences.begin(), it), it->id); + if (it->id != InvalidFenceId) + host1x.syncpoints.at(it->id).Wait(it->value, std::chrono::steady_clock::duration::max()); } }; diff --git a/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h b/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h index 0ad39eb9..a6313a41 100644 --- a/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h +++ b/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h @@ -7,7 +7,7 @@ namespace skyline::service::visrv { /** - * @brief This is used to access the display + * @brief This is used by applications to access the display * @url https://switchbrew.org/wiki/Display_services#IApplicationDisplayService */ class IApplicationDisplayService : public IDisplayService { diff --git a/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp b/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp index 9f08bd83..fb6ffab7 100644 --- a/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp +++ b/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp @@ -46,8 +46,6 @@ namespace skyline::soc::host1x { std::condition_variable cv; bool flag{}; - if (timeout == std::chrono::steady_clock::duration::max()) - timeout = std::chrono::seconds(1); if (!RegisterWaiter(threshold, [&cv, &mtx, &flag] { std::unique_lock lock(mtx); flag = true; @@ -58,7 +56,12 @@ namespace skyline::soc::host1x { } std::unique_lock lock(mtx); - return cv.wait_for(lock, timeout, [&flag] { return flag; }); + if (timeout == std::chrono::steady_clock::duration::max()) { + cv.wait(lock, [&flag] { return flag; }); + return true; + } else { + return cv.wait_for(lock, timeout, [&flag] { return flag; }); + } } } diff --git a/app/src/main/cpp/skyline/soc/host1x/syncpoint.h b/app/src/main/cpp/skyline/soc/host1x/syncpoint.h index 716b8aff..1d7b5b81 100644 --- a/app/src/main/cpp/skyline/soc/host1x/syncpoint.h +++ b/app/src/main/cpp/skyline/soc/host1x/syncpoint.h @@ -47,6 +47,7 @@ namespace skyline::soc::host1x { /** * @brief Waits for the syncpoint to reach given threshold * @return If the wait was successful (true) or timed out (false) + * @note Guaranteed to succeed when 'steady_clock::duration::max()' is used */ bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout); };