diff --git a/src/rendering/vulkan/system/vk_builders.h b/src/rendering/vulkan/system/vk_builders.h index 7eca4c6a1f..6fbb1ce06c 100644 --- a/src/rendering/vulkan/system/vk_builders.h +++ b/src/rendering/vulkan/system/vk_builders.h @@ -309,7 +309,7 @@ public: void addImage(VulkanImage *image, VkImageLayout oldLayout, VkImageLayout newLayout, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, int baseMipLevel = 0, int levelCount = 1); void addImage(VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, int baseMipLevel = 0, int levelCount = 1); void addQueueTransfer(int srcFamily, int dstFamily, VulkanBuffer *buffer, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask); - void addQueueTransfer(int srcFamily, int dstFamily, VulkanImage *image, VkImageLayout layout, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, int baseMipLevel = 0, int levelCount = 1); + void addQueueTransfer(int srcFamily, int dstFamily, VulkanImage *image, VkImageLayout oldlayout, VkImageLayout newlayout, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, int baseMipLevel = 0, int levelCount = 1); void execute(VulkanCommandBuffer *commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags = 0); @@ -1234,12 +1234,12 @@ inline void PipelineBarrier::addQueueTransfer(int srcFamily, int dstFamily, Vulk bufferMemoryBarriers.push_back(barrier); } -inline void PipelineBarrier::addQueueTransfer(int srcFamily, int dstFamily, VulkanImage *image, VkImageLayout layout, VkImageAspectFlags aspectMask, int baseMipLevel, int levelCount) +inline void PipelineBarrier::addQueueTransfer(int srcFamily, int dstFamily, VulkanImage *image, VkImageLayout oldlayout, VkImageLayout newlayout, VkImageAspectFlags aspectMask, int baseMipLevel, int levelCount) { VkImageMemoryBarrier barrier = { }; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = layout; - barrier.newLayout = layout; + barrier.oldLayout = oldlayout; + barrier.newLayout = newlayout; barrier.srcQueueFamilyIndex = srcFamily; barrier.dstQueueFamilyIndex = dstFamily; barrier.image = image->image; diff --git a/src/rendering/vulkan/system/vk_device.cpp b/src/rendering/vulkan/system/vk_device.cpp index a1c7754892..a3f6496dea 100644 --- a/src/rendering/vulkan/system/vk_device.cpp +++ b/src/rendering/vulkan/system/vk_device.cpp @@ -163,6 +163,18 @@ void VulkanDevice::SelectPhysicalDevice() } } + // Search for a transfer family made specifically for uploading. For nvidia this allows us to upload using DMA transfers via PCIe. + // To identify it, we look for a transfer family that must not have graphics or compute capabilities. + for (int i = 0; i < (int)info.QueueFamilies.size(); i++) + { + const auto& queueFamily = info.QueueFamilies[i]; + if (queueFamily.queueCount > 0 && (queueFamily.queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) == 0 && (queueFamily.queueFlags & VK_QUEUE_TRANSFER_BIT)) + { + dev.copyQueueTransferFamily = i; + break; + } + } + if (dev.graphicsFamily != -1 && dev.presentFamily != -1) { SupportedDevices.push_back(dev); @@ -206,6 +218,7 @@ void VulkanDevice::SelectPhysicalDevice() PhysicalDevice = *SupportedDevices[selected].device; graphicsFamily = SupportedDevices[selected].graphicsFamily; presentFamily = SupportedDevices[selected].presentFamily; + copyQueueTransferFamily = SupportedDevices[selected].copyQueueTransferFamily; graphicsTimeQueries = SupportedDevices[selected].graphicsTimeQueries; } @@ -234,6 +247,8 @@ void VulkanDevice::CreateDevice() std::set neededFamilies; neededFamilies.insert(graphicsFamily); neededFamilies.insert(presentFamily); + if (copyQueueTransferFamily != -1) + neededFamilies.insert(copyQueueTransferFamily); for (int index : neededFamilies) { @@ -261,6 +276,9 @@ void VulkanDevice::CreateDevice() vkGetDeviceQueue(device, graphicsFamily, 0, &graphicsQueue); vkGetDeviceQueue(device, presentFamily, 0, &presentQueue); + + if (copyQueueTransferFamily != -1) + vkGetDeviceQueue(device, copyQueueTransferFamily, 0, ©Queue); } void VulkanDevice::CreateSurface() diff --git a/src/rendering/vulkan/system/vk_device.h b/src/rendering/vulkan/system/vk_device.h index 43185bd505..1ab69406a2 100644 --- a/src/rendering/vulkan/system/vk_device.h +++ b/src/rendering/vulkan/system/vk_device.h @@ -30,6 +30,7 @@ public: VulkanPhysicalDevice *device = nullptr; int graphicsFamily = -1; int presentFamily = -1; + int copyQueueTransferFamily = -1; bool graphicsTimeQueries = false; }; @@ -72,11 +73,13 @@ public: VkDevice device = VK_NULL_HANDLE; VmaAllocator allocator = VK_NULL_HANDLE; + VkQueue copyQueue = VK_NULL_HANDLE; VkQueue graphicsQueue = VK_NULL_HANDLE; VkQueue presentQueue = VK_NULL_HANDLE; int graphicsFamily = -1; int presentFamily = -1; + int copyQueueTransferFamily = -1; bool graphicsTimeQueries = false; private: diff --git a/src/rendering/vulkan/system/vk_framebuffer.cpp b/src/rendering/vulkan/system/vk_framebuffer.cpp index 80532538ca..b204632dd9 100644 --- a/src/rendering/vulkan/system/vk_framebuffer.cpp +++ b/src/rendering/vulkan/system/vk_framebuffer.cpp @@ -146,6 +146,8 @@ void VulkanFrameBuffer::InitializeState() maxuniformblock = device->PhysicalDevice.Properties.limits.maxUniformBufferRange; mCommandPool.reset(new VulkanCommandPool(device, device->graphicsFamily)); + if (device->copyQueueTransferFamily != -1) + mCopyQueueCommandPool.reset(new VulkanCommandPool(device, device->copyQueueTransferFamily)); mScreenBuffers.reset(new VkRenderBuffers()); mSaveBuffers.reset(new VkRenderBuffers()); @@ -218,7 +220,7 @@ void VulkanFrameBuffer::DeleteFrameObjects() FrameDeleteList.CommandBuffers.clear(); } -void VulkanFrameBuffer::FlushCommands(VulkanCommandBuffer **commands, size_t count, bool finish, bool lastsubmit) +void VulkanFrameBuffer::FlushCommands(VkQueue queue, VulkanCommandBuffer **commands, size_t count, bool finish, bool lastsubmit) { int currentIndex = mNextSubmit % maxConcurrentSubmitCount; @@ -245,7 +247,7 @@ void VulkanFrameBuffer::FlushCommands(VulkanCommandBuffer **commands, size_t cou if (!lastsubmit) submit.addSignal(mSubmitSemaphore[currentIndex].get()); - submit.execute(device, device->graphicsQueue, mSubmitFence[currentIndex].get()); + submit.execute(device, queue, mSubmitFence[currentIndex].get()); mNextSubmit++; } @@ -253,6 +255,15 @@ void VulkanFrameBuffer::FlushCommands(bool finish, bool lastsubmit) { mRenderState->EndRenderPass(); + if (mCopyQueueCommands) + { + mCopyQueueCommands->end(); + VulkanCommandBuffer* command = mCopyQueueCommands.get(); + FrameDeleteList.CommandBuffers.push_back(std::move(mCopyQueueCommands)); + + FlushCommands(device->copyQueue, &command, 1, false, false); + } + if (mDrawCommands || mTransferCommands) { VulkanCommandBuffer *commands[2]; @@ -272,7 +283,7 @@ void VulkanFrameBuffer::FlushCommands(bool finish, bool lastsubmit) FrameDeleteList.CommandBuffers.push_back(std::move(mDrawCommands)); } - FlushCommands(commands, count, finish, lastsubmit); + FlushCommands(device->graphicsQueue, commands, count, finish, lastsubmit); current_rendered_commandbuffers += (int)count; } @@ -906,6 +917,17 @@ void VulkanFrameBuffer::Draw2D() ::Draw2D(&m2DDrawer, *mRenderState); } +VulkanCommandBuffer *VulkanFrameBuffer::GetCopyQueueCommands() +{ + if (!mCopyQueueCommands) + { + mCopyQueueCommands = mCopyQueueCommandPool->createBuffer(); + mCopyQueueCommands->SetDebugName("VulkanFrameBuffer.mCopyQueueCommands"); + mCopyQueueCommands->begin(); + } + return mCopyQueueCommands.get(); +} + VulkanCommandBuffer *VulkanFrameBuffer::GetTransferCommands() { if (!mTransferCommands) diff --git a/src/rendering/vulkan/system/vk_framebuffer.h b/src/rendering/vulkan/system/vk_framebuffer.h index 64dfa3e7bd..06277afd1d 100644 --- a/src/rendering/vulkan/system/vk_framebuffer.h +++ b/src/rendering/vulkan/system/vk_framebuffer.h @@ -27,6 +27,7 @@ public: uint32_t presentImageIndex = 0xffffffff; bool cur_vsync; + VulkanCommandBuffer *GetCopyQueueCommands(); VulkanCommandBuffer *GetTransferCommands(); VulkanCommandBuffer *GetDrawCommands(); VkShaderManager *GetShaderManager() { return mShaderManager.get(); } @@ -118,7 +119,7 @@ private: void CopyScreenToBuffer(int w, int h, void *data); void UpdateShadowMap(); void DeleteFrameObjects(); - void FlushCommands(VulkanCommandBuffer **commands, size_t count, bool finish, bool lastsubmit); + void FlushCommands(VkQueue queue, VulkanCommandBuffer **commands, size_t count, bool finish, bool lastsubmit); std::unique_ptr mShaderManager; std::unique_ptr mSamplerManager; @@ -127,6 +128,8 @@ private: std::unique_ptr mPostprocess; std::unique_ptr mRenderPassManager; std::unique_ptr mCommandPool; + std::unique_ptr mCopyQueueCommandPool; + std::unique_ptr mCopyQueueCommands; std::unique_ptr mTransferCommands; std::unique_ptr mRenderState; diff --git a/src/rendering/vulkan/textures/vk_hwtexture.cpp b/src/rendering/vulkan/textures/vk_hwtexture.cpp index d43abaf6ac..e51ef97953 100644 --- a/src/rendering/vulkan/textures/vk_hwtexture.cpp +++ b/src/rendering/vulkan/textures/vk_hwtexture.cpp @@ -71,13 +71,18 @@ void VkHardwareTexture::Reset() if (mappedSWFB) { - mImage.Image->Unmap(); + if (mTransferBuffer) + mTransferBuffer->Unmap(); + else + mImage.Image->Unmap(); mappedSWFB = nullptr; } auto &deleteList = fb->FrameDeleteList; if (mImage.Image) deleteList.Images.push_back(std::move(mImage.Image)); if (mImage.View) deleteList.ImageViews.push_back(std::move(mImage.View)); + if (mTransferImage) deleteList.Images.push_back(std::move(mTransferImage)); + if (mTransferBuffer) deleteList.Buffers.push_back(std::move(mTransferBuffer)); for (auto &it : mImage.RSFramebuffers) deleteList.Framebuffers.push_back(std::move(it.second)); if (mDepthStencil.Image) deleteList.Images.push_back(std::move(mDepthStencil.Image)); if (mDepthStencil.View) deleteList.ImageViews.push_back(std::move(mDepthStencil.View)); @@ -324,17 +329,52 @@ void VkHardwareTexture::AllocateBuffer(int w, int h, int texelsize) VkFormat format = texelsize == 4 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_R8_UNORM; - ImageBuilder imgbuilder; - VkDeviceSize allocatedBytes = 0; - imgbuilder.setFormat(format); - imgbuilder.setSize(w, h); - imgbuilder.setLinearTiling(); - imgbuilder.setUsage(VK_IMAGE_USAGE_SAMPLED_BIT, VMA_MEMORY_USAGE_UNKNOWN, VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); - imgbuilder.setMemoryType( - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - mImage.Image = imgbuilder.create(fb->device, &allocatedBytes); - mImage.Image->SetDebugName("VkHardwareTexture.mImage"); + if (fb->device->copyQueueTransferFamily != -1) + { + // Use DMA transfer to get the image to the GPU + + BufferBuilder bufbuilder; + bufbuilder.setUsage(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_MEMORY_USAGE_CPU_ONLY, VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); + bufbuilder.setSize(w * h * texelsize); + mTransferBuffer = bufbuilder.create(fb->device); + mTransferBuffer->SetDebugName("VkHardwareTexture.mTransferBuffer"); + + ImageBuilder imgbuilder0; + imgbuilder0.setFormat(format); + imgbuilder0.setSize(w, h); + imgbuilder0.setUsage(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + mTransferImage = imgbuilder0.create(fb->device); + mTransferImage->SetDebugName("VkHardwareTexture.mTransferImage"); + + ImageBuilder imgbuilder1; + imgbuilder1.setFormat(format); + imgbuilder1.setSize(w, h); + imgbuilder1.setUsage(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + mImage.Image = imgbuilder1.create(fb->device); + mImage.Image->SetDebugName("VkHardwareTexture.mImage"); + + bufferpitch = w; + } + else + { + // Memory map the image directly for GPUs where we have no transfer queue (i.e. Intel embedded GPUs) + + ImageBuilder imgbuilder; + imgbuilder.setFormat(format); + imgbuilder.setSize(w, h); + imgbuilder.setLinearTiling(); + imgbuilder.setUsage(VK_IMAGE_USAGE_SAMPLED_BIT, VMA_MEMORY_USAGE_UNKNOWN, VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); + imgbuilder.setMemoryType( + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + VkDeviceSize allocatedBytes = 0; + mImage.Image = imgbuilder.create(fb->device, &allocatedBytes); + mImage.Image->SetDebugName("VkHardwareTexture.mImage"); + + bufferpitch = int(allocatedBytes / h / texelsize); + } + mTexelsize = texelsize; ImageViewBuilder viewbuilder; @@ -347,20 +387,68 @@ void VkHardwareTexture::AllocateBuffer(int w, int h, int texelsize) VkImageTransition imageTransition; imageTransition.addImage(&mImage, VK_IMAGE_LAYOUT_GENERAL, true); imageTransition.execute(cmdbuffer); - - bufferpitch = int(allocatedBytes / h / texelsize); } } uint8_t *VkHardwareTexture::MapBuffer() { if (!mappedSWFB) - mappedSWFB = (uint8_t*)mImage.Image->Map(0, mImage.Image->width * mImage.Image->height * mTexelsize); + { + if (mTransferBuffer) + mappedSWFB = (uint8_t*)mTransferBuffer->Map(0, mImage.Image->width * mImage.Image->height * mTexelsize); + else + mappedSWFB = (uint8_t*)mImage.Image->Map(0, mImage.Image->width * mImage.Image->height * mTexelsize); + } return mappedSWFB; } unsigned int VkHardwareTexture::CreateTexture(unsigned char * buffer, int w, int h, int texunit, bool mipmap, int translation, const char *name) { + if (mTransferBuffer) + { + auto fb = GetVulkanFrameBuffer(); + auto copyqueue = fb->GetCopyQueueCommands(); + + // Acquire image, transfer buffer via copy queue (PCIe DMA), release image + + PipelineBarrier barrier0; + barrier0.addQueueTransfer(fb->device->graphicsFamily, fb->device->copyQueueTransferFamily, mTransferImage.get(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + barrier0.execute(copyqueue, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + VkBufferImageCopy region = {}; + region.imageExtent.width = mTransferImage->width; + region.imageExtent.height = mTransferImage->height; + region.imageExtent.depth = 1; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.layerCount = 1; + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyqueue->copyBufferToImage(mTransferBuffer->buffer, mTransferImage->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + PipelineBarrier barrier1; + barrier1.addQueueTransfer(fb->device->copyQueueTransferFamily, fb->device->graphicsFamily, mTransferImage.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + barrier1.execute(copyqueue, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Acquire image on graphics queue, make a copy of it (on the GPU), then release the image again back to the copy queue + + auto gfxqueue = fb->GetTransferCommands(); + + PipelineBarrier barrier2; + barrier2.addImage(mImage.Image.get(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + barrier2.addQueueTransfer(fb->device->copyQueueTransferFamily, fb->device->graphicsFamily, mTransferImage.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + barrier2.execute(gfxqueue, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + VkImageCopy imgregion = {}; + imgregion.extent = region.imageExtent; + imgregion.srcSubresource = region.imageSubresource; + imgregion.dstSubresource = region.imageSubresource; + gfxqueue->copyImage(mTransferImage->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, mImage.Image->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imgregion); + + PipelineBarrier barrier3; + barrier3.addImage(mImage.Image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mImage.Layout, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + barrier3.addQueueTransfer(fb->device->graphicsFamily, fb->device->copyQueueTransferFamily, mTransferImage.get(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + barrier3.execute(gfxqueue, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } + return 0; } diff --git a/src/rendering/vulkan/textures/vk_hwtexture.h b/src/rendering/vulkan/textures/vk_hwtexture.h index 9770cb1c01..8b137942db 100644 --- a/src/rendering/vulkan/textures/vk_hwtexture.h +++ b/src/rendering/vulkan/textures/vk_hwtexture.h @@ -78,5 +78,8 @@ private: VkTextureImage mDepthStencil; + std::unique_ptr mTransferBuffer; + std::unique_ptr mTransferImage; + uint8_t* mappedSWFB = nullptr; };