HostDisplay: Add threaded presentation for Vulkan renderer

Can add a significant speedup for fast forward. Automatically disabled
when vsync is enabled.
This commit is contained in:
Connor McLaughlin
2020-12-26 23:22:24 +10:00
parent 1a6a14fcd4
commit 702ed21207
29 changed files with 241 additions and 76 deletions

View File

@ -39,6 +39,8 @@ Context::Context(VkInstance instance, VkPhysicalDevice physical_device, bool own
Context::~Context()
{
StopPresentThread();
if (m_device != VK_NULL_HANDLE)
WaitForGPUIdle();
@ -289,7 +291,7 @@ Context::GPUNameList Context::EnumerateGPUNames(VkInstance instance)
}
bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::unique_ptr<SwapChain>* out_swap_chain,
bool enable_debug_reports, bool enable_validation_layer)
bool threaded_presentation, bool enable_debug_reports, bool enable_validation_layer)
{
AssertMsg(!g_vulkan_context, "Has no current context");
@ -374,6 +376,9 @@ bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::uniqu
return false;
}
if (threaded_presentation)
g_vulkan_context->StartPresentThread();
return true;
}
@ -810,6 +815,7 @@ void Context::WaitForFenceCounter(u64 fence_counter)
void Context::WaitForGPUIdle()
{
WaitForPresentComplete();
vkDeviceWaitIdle(m_device);
}
@ -843,8 +849,10 @@ void Context::WaitForCommandBufferCompletion(u32 index)
m_completed_fence_counter = now_completed_counter;
}
void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal_semaphore,
VkSwapchainKHR present_swap_chain, uint32_t present_image_index)
void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore /* = VK_NULL_HANDLE */,
VkSemaphore signal_semaphore /* = VK_NULL_HANDLE */,
VkSwapchainKHR present_swap_chain /* = VK_NULL_HANDLE */,
uint32_t present_image_index /* = 0xFFFFFFFF */, bool submit_on_thread /* = false */)
{
FrameResources& resources = m_frame_resources[m_current_frame];
@ -859,7 +867,30 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal
// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;
// This may be executed on the worker thread, so don't modify any state of the manager class.
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
if (!submit_on_thread || !m_present_thread.joinable())
{
DoSubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore);
if (present_swap_chain != VK_NULL_HANDLE)
DoPresent(signal_semaphore, present_swap_chain, present_image_index);
return;
}
m_queued_present.command_buffer_index = m_current_frame;
m_queued_present.present_swap_chain = present_swap_chain;
m_queued_present.present_image_index = present_image_index;
m_queued_present.wait_semaphore = wait_semaphore;
m_queued_present.signal_semaphore = signal_semaphore;
m_present_done.store(false);
m_present_queued_cv.notify_one();
}
void Context::DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore)
{
FrameResources& resources = m_frame_resources[index];
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, &wait_bits, 1u,
&resources.command_buffer, 0, nullptr};
@ -876,39 +907,93 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal
submit_info.pSignalSemaphores = &signal_semaphore;
}
res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
Panic("Failed to submit command buffer.");
}
}
// Do we have a swap chain to present?
if (present_swap_chain != VK_NULL_HANDLE)
void Context::DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index)
{
// Should have a signal semaphore.
Assert(wait_semaphore != VK_NULL_HANDLE);
VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
nullptr,
1,
&wait_semaphore,
1,
&present_swap_chain,
&present_image_index,
nullptr};
VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
if (res != VK_SUCCESS)
{
// Should have a signal semaphore.
Assert(signal_semaphore != VK_NULL_HANDLE);
VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
nullptr,
1,
&signal_semaphore,
1,
&present_swap_chain,
&present_image_index,
nullptr};
// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
res = vkQueuePresentKHR(m_present_queue, &present_info);
if (res != VK_SUCCESS)
{
// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
m_last_present_failed = true;
}
m_last_present_failed.store(true);
}
}
void Context::WaitForPresentComplete()
{
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
}
void Context::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
{
if (m_present_done.load())
return;
m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); });
}
void Context::PresentThread()
{
std::unique_lock<std::mutex> lock(m_present_mutex);
while (!m_present_thread_done.load())
{
m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); });
if (m_present_done.load())
continue;
DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.wait_semaphore,
m_queued_present.signal_semaphore);
DoPresent(m_queued_present.signal_semaphore, m_queued_present.present_swap_chain,
m_queued_present.present_image_index);
m_present_done.store(true);
m_present_done_cv.notify_one();
}
}
void Context::StartPresentThread()
{
Assert(!m_present_thread.joinable());
m_present_thread_done.store(false);
m_present_thread = std::thread(&Context::PresentThread, this);
}
void Context::StopPresentThread()
{
if (!m_present_thread.joinable())
return;
{
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
m_present_thread_done.store(true);
m_present_queued_cv.notify_one();
}
m_present_thread.join();
}
void Context::MoveToNextCommandBuffer()
{
ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS);
@ -918,6 +1003,9 @@ void Context::ActivateCommandBuffer(u32 index)
{
FrameResources& resources = m_frame_resources[index];
if (!m_present_done.load() && m_queued_present.command_buffer_index == index)
WaitForPresentComplete();
// Wait for the GPU to finish with all resources for this command buffer.
if (resources.fence_counter > m_completed_fence_counter)
WaitForCommandBufferCompletion(index);

View File

@ -8,10 +8,14 @@
#include "../types.h"
#include "vulkan_loader.h"
#include <array>
#include <atomic>
#include <condition_variable>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
struct WindowInfo;
@ -44,7 +48,7 @@ public:
// Creates a new context and sets it up as global.
static bool Create(std::string_view gpu_name, const WindowInfo* wi, std::unique_ptr<SwapChain>* out_swap_chain,
bool enable_debug_reports, bool enable_validation_layer);
bool threaded_presentation, bool enable_debug_reports, bool enable_validation_layer);
// Creates a new context from a pre-existing instance.
static bool CreateFromExistingInstance(VkInstance instance, VkPhysicalDevice gpu, VkSurfaceKHR surface,
@ -147,10 +151,11 @@ public:
void SubmitCommandBuffer(VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkSemaphore signal_semaphore = VK_NULL_HANDLE,
VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE,
uint32_t present_image_index = 0xFFFFFFFF);
uint32_t present_image_index = 0xFFFFFFFF, bool submit_on_thread = false);
void MoveToNextCommandBuffer();
void ExecuteCommandBuffer(bool wait_for_completion);
void WaitForPresentComplete();
// Was the last present submitted to the queue a failure? If so, we must recreate our swapchain.
bool CheckLastPresentFail();
@ -191,6 +196,13 @@ private:
void ActivateCommandBuffer(u32 index);
void WaitForCommandBufferCompletion(u32 index);
void DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore);
void DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index);
void WaitForPresentComplete(std::unique_lock<std::mutex>& lock);
void PresentThread();
void StartPresentThread();
void StopPresentThread();
struct FrameResources
{
// [0] - Init (upload) command buffer, [1] - draw command buffer
@ -223,7 +235,25 @@ private:
u32 m_current_frame;
bool m_owns_device = false;
bool m_last_present_failed = false;
std::atomic_bool m_last_present_failed{false};
std::atomic_bool m_present_done{true};
std::mutex m_present_mutex;
std::condition_variable m_present_queued_cv;
std::condition_variable m_present_done_cv;
std::thread m_present_thread;
std::atomic_bool m_present_thread_done{false};
struct QueuedPresent
{
VkSemaphore wait_semaphore;
VkSemaphore signal_semaphore;
VkSwapchainKHR present_swap_chain;
u32 command_buffer_index;
u32 present_image_index;
};
QueuedPresent m_queued_present = {};
// Render pass cache
using RenderPassCacheKey = std::tuple<VkFormat, VkFormat, VkSampleCountFlagBits, VkAttachmentLoadOp>;