GPU/Software: Reduce number of copies by one, enable 16-bit scanout

This commit is contained in:
Connor McLaughlin
2020-10-22 01:25:33 +10:00
parent beffbaee39
commit d3d881aa6b
20 changed files with 875 additions and 187 deletions

View File

@ -171,6 +171,58 @@ bool D3D11HostDisplay::DownloadTexture(const void* texture_handle, u32 x, u32 y,
static_cast<u32*>(out_data));
}
static constexpr std::array<DXGI_FORMAT, static_cast<u32>(HostDisplayPixelFormat::Count)>
s_display_pixel_format_mapping = {{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM,
DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM}};
bool D3D11HostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const
{
const DXGI_FORMAT dfmt = s_display_pixel_format_mapping[static_cast<u32>(format)];
if (dfmt == DXGI_FORMAT_UNKNOWN)
return false;
UINT support = 0;
const UINT required = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
return (SUCCEEDED(m_device->CheckFormatSupport(dfmt, &support) && ((support & required) == required)));
}
bool D3D11HostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer,
u32* out_pitch)
{
ClearDisplayTexture();
const DXGI_FORMAT dxgi_format = s_display_pixel_format_mapping[static_cast<u32>(format)];
if (m_display_pixels_texture.GetWidth() < width || m_display_pixels_texture.GetHeight() < height ||
m_display_pixels_texture.GetFormat() != dxgi_format)
{
if (!m_display_pixels_texture.Create(m_device.Get(), width, height, 1, dxgi_format, D3D11_BIND_SHADER_RESOURCE,
nullptr, 0, true))
{
return false;
}
}
D3D11_MAPPED_SUBRESOURCE sr;
HRESULT hr = m_context->Map(m_display_pixels_texture.GetD3DTexture(), 0, D3D11_MAP_WRITE_DISCARD, 0, &sr);
if (FAILED(hr))
{
Log_ErrorPrintf("Map pixels texture failed: %08X", hr);
return false;
}
*out_buffer = sr.pData;
*out_pitch = sr.RowPitch;
SetDisplayTexture(m_display_pixels_texture.GetD3DSRV(), format, m_display_pixels_texture.GetWidth(),
m_display_pixels_texture.GetHeight(), 0, 0, static_cast<u32>(width), static_cast<u32>(height));
return true;
}
void D3D11HostDisplay::EndSetDisplayPixels()
{
m_context->Unmap(m_display_pixels_texture.GetD3DTexture(), 0);
}
void D3D11HostDisplay::SetVSync(bool enabled)
{
#ifndef LIBRETRO

View File

@ -57,6 +57,10 @@ public:
u32 texture_data_stride) override;
bool DownloadTexture(const void* texture_handle, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override;
bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer,
u32* out_pitch) override;
void EndSetDisplayPixels() override;
virtual void SetVSync(bool enabled) override;

View File

@ -1,4 +1,5 @@
#include "opengl_host_display.h"
#include "common/align.h"
#include "common/assert.h"
#include "common/log.h"
#include <array>
@ -122,6 +123,108 @@ bool OpenGLHostDisplay::DownloadTexture(const void* texture_handle, u32 x, u32 y
return true;
}
static constexpr std::array<std::tuple<GLenum, GLenum, GLenum>, static_cast<u32>(HostDisplayPixelFormat::Count)>
s_display_pixel_format_mapping = {{
{}, // Unknown
{GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8
{GL_BGRA, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV} // RGBA5551
}};
bool OpenGLHostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const
{
return (std::get<0>(s_display_pixel_format_mapping[static_cast<u32>(format)]) != static_cast<GLenum>(0));
}
bool OpenGLHostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer,
u32* out_pitch)
{
const u32 pixel_size = GetDisplayPixelFormatSize(format);
const u32 stride = Common::AlignUpPow2(width * pixel_size, 4);
const u32 size_required = stride * height * pixel_size;
const u32 buffer_size = Common::AlignUpPow2(size_required * 2, 4 * 1024 * 1024);
if (!m_display_pixels_texture_pbo || m_display_pixels_texture_pbo->GetSize() < buffer_size)
{
m_display_pixels_texture_pbo.reset();
m_display_pixels_texture_pbo = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, buffer_size);
if (!m_display_pixels_texture_pbo)
return false;
}
const auto map = m_display_pixels_texture_pbo->Map(GetDisplayPixelFormatSize(format), size_required);
m_display_texture_format = format;
m_display_pixels_texture_pbo_map_offset = map.buffer_offset;
m_display_pixels_texture_pbo_map_size = size_required;
*out_buffer = map.pointer;
*out_pitch = stride;
if (m_display_pixels_texture_id == 0)
{
glGenTextures(1, &m_display_pixels_texture_id);
glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1);
}
SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_pixels_texture_id)), format, width, height,
0, 0, width, height);
return true;
}
void OpenGLHostDisplay::EndSetDisplayPixels()
{
const u32 width = static_cast<u32>(m_display_texture_view_width);
const u32 height = static_cast<u32>(m_display_texture_view_height);
const auto [gl_internal_format, gl_format, gl_type] =
s_display_pixel_format_mapping[static_cast<u32>(m_display_texture_format)];
// glTexImage2D should be quicker on Mali...
m_display_pixels_texture_pbo->Unmap(m_display_pixels_texture_pbo_map_size);
m_display_pixels_texture_pbo->Bind();
glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id);
glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_pixels_texture_pbo_map_offset)));
glBindTexture(GL_TEXTURE_2D, 0);
m_display_pixels_texture_pbo->Unbind();
m_display_pixels_texture_pbo_map_offset = 0;
m_display_pixels_texture_pbo_map_size = 0;
}
bool OpenGLHostDisplay::SetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, const void* buffer,
u32 pitch)
{
if (m_display_pixels_texture_id == 0)
{
glGenTextures(1, &m_display_pixels_texture_id);
glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1);
}
else
{
glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id);
}
const auto [gl_internal_format, gl_format, gl_type] = s_display_pixel_format_mapping[static_cast<u32>(format)];
glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, buffer);
glBindTexture(GL_TEXTURE_2D, 0);
SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_pixels_texture_id)), format, width, height,
0, 0, width, height);
return true;
}
void OpenGLHostDisplay::SetVSync(bool enabled)
{
if (m_gl_context->GetWindowInfo().type == WindowInfo::Type::Surfaceless)
@ -451,6 +554,12 @@ void OpenGLHostDisplay::DestroyResources()
m_post_processing_stages.clear();
#endif
if (m_display_pixels_texture_id != 0)
{
glDeleteTextures(1, &m_display_pixels_texture_id);
m_display_pixels_texture_id = 0;
}
if (m_display_vao != 0)
glDeleteVertexArrays(1, &m_display_vao);
if (m_display_linear_sampler != 0)

View File

@ -57,6 +57,11 @@ public:
u32 texture_data_stride) override;
bool DownloadTexture(const void* texture_handle, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override;
bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer,
u32* out_pitch) override;
void EndSetDisplayPixels() override;
bool SetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, const void* buffer, u32 pitch) override;
virtual void SetVSync(bool enabled) override;
@ -104,6 +109,11 @@ protected:
GLuint m_display_linear_sampler = 0;
GLuint m_uniform_buffer_alignment = 1;
GLuint m_display_pixels_texture_id = 0;
std::unique_ptr<GL::StreamBuffer> m_display_pixels_texture_pbo;
u32 m_display_pixels_texture_pbo_map_offset = 0;
u32 m_display_pixels_texture_pbo_map_size = 0;
#ifndef LIBRETRO
PostProcessingChain m_post_processing_chain;
GL::Texture m_post_processing_input_texture;

View File

@ -247,6 +247,60 @@ bool VulkanHostDisplay::DownloadTexture(const void* texture_handle, u32 x, u32 y
return true;
}
static constexpr std::array<VkFormat, static_cast<u32>(HostDisplayPixelFormat::Count)> s_display_pixel_format_mapping =
{{VK_FORMAT_UNDEFINED, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B5G6R5_UNORM_PACK16,
VK_FORMAT_A1R5G5B5_UNORM_PACK16}};
bool VulkanHostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const
{
const VkFormat vk_format = s_display_pixel_format_mapping[static_cast<u32>(format)];
if (vk_format == VK_FORMAT_UNDEFINED)
return false;
VkFormatProperties fp = {};
vkGetPhysicalDeviceFormatProperties(g_vulkan_context->GetPhysicalDevice(), vk_format, &fp);
const VkFormatFeatureFlags required = (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
return ((fp.optimalTilingFeatures & required) == required);
}
bool VulkanHostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer,
u32* out_pitch)
{
const VkFormat vk_format = s_display_pixel_format_mapping[static_cast<u32>(format)];
if (m_display_pixels_texture.GetWidth() < width || m_display_pixels_texture.GetHeight() < height ||
m_display_pixels_texture.GetFormat() != vk_format)
{
if (!m_display_pixels_texture.Create(width, height, 1, 1, vk_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))
{
return false;
}
}
if ((m_upload_staging_texture.GetWidth() < width || m_upload_staging_texture.GetHeight() < height) &&
!m_upload_staging_texture.Create(Vulkan::StagingBuffer::Type::Upload, vk_format, width, height))
{
return false;
}
SetDisplayTexture(&m_display_pixels_texture, format, m_display_pixels_texture.GetWidth(),
m_display_pixels_texture.GetHeight(), 0, 0, width, height);
*out_buffer = m_upload_staging_texture.GetMappedPointer();
*out_pitch = m_upload_staging_texture.GetMappedStride();
return true;
}
void VulkanHostDisplay::EndSetDisplayPixels()
{
m_upload_staging_texture.CopyToTexture(0, 0, m_display_pixels_texture, 0, 0, 0, 0,
static_cast<u32>(m_display_texture_view_width),
static_cast<u32>(m_display_texture_view_height));
}
void VulkanHostDisplay::SetVSync(bool enabled)
{
if (!m_swap_chain)
@ -461,6 +515,7 @@ void VulkanHostDisplay::DestroyResources()
m_post_processing_chain.ClearStages();
#endif
m_display_pixels_texture.Destroy(false);
m_readback_staging_texture.Destroy(false);
m_upload_staging_texture.Destroy(false);

View File

@ -55,6 +55,11 @@ public:
bool DownloadTexture(const void* texture_handle, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override;
bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer,
u32* out_pitch) override;
void EndSetDisplayPixels() override;
virtual void SetVSync(bool enabled) override;
virtual bool Render() override;
@ -117,6 +122,7 @@ protected:
VkSampler m_point_sampler = VK_NULL_HANDLE;
VkSampler m_linear_sampler = VK_NULL_HANDLE;
Vulkan::Texture m_display_pixels_texture;
Vulkan::StagingTexture m_upload_staging_texture;
Vulkan::StagingTexture m_readback_staging_texture;