Implement indexed quads support

We previously only supported non-indexed quads. Support for this is implemented by converting the index buffer at record time and pushing the result into the megabuffer, which is then used as the index buffer in the final draw command.
This commit is contained in:
lynxnb 2022-08-04 20:00:36 +01:00 committed by Billy Laws
parent e6741642ba
commit d5174175d1
3 changed files with 71 additions and 13 deletions

View File

@ -19,4 +19,37 @@ namespace skyline::gpu::interconnect::conversion::quads {
*(dest++) = i + 0;
}
}
template<typename S>
static void GenerateQuadIndexConversionBufferImpl(S *__restrict__ dest, S *__restrict__ source, u32 indexCount) {
#pragma clang loop vectorize(enable) interleave(enable) unroll(enable)
for (size_t i{}; i < indexCount; i += 4, source += 4) {
// Given a quad ABCD, we want to generate triangles ABC & CDA
// Triangle ABC
*(dest++) = *(source + 0);
*(dest++) = *(source + 1);
*(dest++) = *(source + 2);
// Triangle CDA
*(dest++) = *(source + 2);
*(dest++) = *(source + 3);
*(dest++) = *(source + 0);
}
}
void GenerateIndexedQuadConversionBuffer(u8 *dest, u8 *source, u32 indexCount, vk::IndexType type) {
switch (type) {
case vk::IndexType::eUint32:
GenerateQuadIndexConversionBufferImpl(reinterpret_cast<u32 *>(dest), reinterpret_cast<u32 *>(source), indexCount);
break;
case vk::IndexType::eUint16:
GenerateQuadIndexConversionBufferImpl(reinterpret_cast<u16 *>(dest), reinterpret_cast<u16 *>(source), indexCount);
break;
case vk::IndexType::eUint8EXT:
GenerateQuadIndexConversionBufferImpl(dest, source, indexCount);
break;
default:
break;
}
}
}

View File

@ -43,4 +43,10 @@ namespace skyline::gpu::interconnect::conversion::quads {
* @note The size of the supplied buffer should be at least the size returned by GetRequiredBufferSize()
*/
void GenerateQuadListConversionBuffer(u32 *dest, u32 vertexCount);
/**
* @brief Create an index buffer that repeats quad vertices from the source buffer to generate a triangle list
* @note The size of the destination buffer should be at least the size returned by GetRequiredBufferSize()
*/
void GenerateIndexedQuadConversionBuffer(u8 *dest, u8 *source, u32 indexCount, vk::IndexType type);
}

View File

@ -1699,6 +1699,19 @@ namespace skyline::gpu::interconnect {
return {quadListConversionBuffer->GetView(0, size), vk::IndexType::eUint32, conversion::quads::GetIndexCount(count)};
}
MegaBufferAllocator::Allocation GetIndexedQuadConversionBuffer(u32 count) {
vk::DeviceSize size{conversion::quads::GetRequiredBufferSize(count, indexBuffer.type)};
auto allocation{executor.AcquireMegaBufferAllocator().Allocate(executor.cycle, size)};
ContextLock lock{executor.tag, indexBuffer.view};
auto guestIndexBuffer{indexBuffer.view.GetReadOnlyBackingSpan(lock.IsFirstUsage(), []() {
// TODO: see Read()
Logger::Error("Dirty index buffer reads for attached buffers are unimplemented");
})};
conversion::quads::GenerateIndexedQuadConversionBuffer(allocation.region.data(), guestIndexBuffer.data(), count, indexBuffer.type);
return allocation;
}
public:
void SetVertexBufferStride(u32 index, u32 stride) {
vertexBuffers[index].bindingDescription.stride = stride;
@ -2972,23 +2985,29 @@ namespace skyline::gpu::interconnect {
std::shared_ptr<BoundIndexBuffer> boundIndexBuffer{};
if constexpr (IsIndexed) {
if (needsQuadConversion)
throw exception("Indexed quad conversion is not supported");
auto indexBufferView{GetIndexBuffer(count)};
executor.AttachBuffer(indexBufferView);
boundIndexBuffer = std::allocate_shared<BoundIndexBuffer, LinearAllocator<BoundIndexBuffer>>(executor.allocator);
boundIndexBuffer->type = indexBuffer.type;
if (auto megaBufferAllocation{indexBufferView.AcquireMegaBuffer(executor.cycle, executor.AcquireMegaBufferAllocator())}) {
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
boundIndexBuffer->handle = megaBufferAllocation.buffer;
boundIndexBuffer->offset = megaBufferAllocation.offset;
if (needsQuadConversion) {
auto allocation{GetIndexedQuadConversionBuffer(count)};
boundIndexBuffer->handle = allocation.buffer;
boundIndexBuffer->offset = allocation.offset;
count = conversion::quads::GetIndexCount(count);
} else {
indexBufferView.RegisterUsage(executor.allocator, executor.cycle, [=](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
boundIndexBuffer->handle = buffer->GetBacking();
boundIndexBuffer->offset = view.offset;
});
executor.AttachBuffer(indexBufferView);
boundIndexBuffer->type = indexBuffer.type;
if (auto megaBufferAllocation{indexBufferView.AcquireMegaBuffer(executor.cycle, executor.AcquireMegaBufferAllocator())}) {
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
boundIndexBuffer->handle = megaBufferAllocation.buffer;
boundIndexBuffer->offset = megaBufferAllocation.offset;
} else {
indexBufferView.RegisterUsage(executor.allocator, executor.cycle, [=](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
boundIndexBuffer->handle = buffer->GetBacking();
boundIndexBuffer->offset = view.offset;
});
}
}
} else if (needsQuadConversion) {
// Convert the guest-supplied quad list to an indexed triangle list