From 2eaebd89218578aafb2b73bb8931bcf9d30811ef Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 31 Oct 2020 00:38:06 +1000 Subject: [PATCH] GPU: Implement support for multisample antialiasing --- src/common/d3d11/texture.cpp | 30 +- src/common/d3d11/texture.h | 5 +- src/common/gl/texture.cpp | 47 +++- src/common/gl/texture.h | 8 +- src/common/vulkan/builders.cpp | 7 + src/common/vulkan/builders.h | 1 + src/common/vulkan/context.cpp | 1 + src/core/gpu_hw.cpp | 44 ++- src/core/gpu_hw.h | 8 +- src/core/gpu_hw_d3d11.cpp | 131 +++++---- src/core/gpu_hw_d3d11.h | 6 +- src/core/gpu_hw_opengl.cpp | 117 ++++---- src/core/gpu_hw_shadergen.cpp | 92 +++++-- src/core/gpu_hw_shadergen.h | 10 +- src/core/gpu_hw_vulkan.cpp | 147 ++++++---- src/core/host_interface.cpp | 4 + src/core/settings.cpp | 4 + src/core/settings.h | 2 + src/core/shadergen.cpp | 156 +++++++---- src/core/shadergen.h | 8 +- .../enhancementsettingswidget.cpp | 26 ++ .../enhancementsettingswidget.h | 1 + .../enhancementsettingswidget.ui | 256 +++++++++--------- src/duckstation-qt/qtutils.cpp | 34 ++- src/duckstation-qt/qtutils.h | 6 + src/duckstation-sdl/sdl_host_interface.cpp | 46 +++- src/frontend-common/d3d11_host_display.cpp | 4 +- src/frontend-common/opengl_host_display.cpp | 10 +- 28 files changed, 804 insertions(+), 407 deletions(-) diff --git a/src/common/d3d11/texture.cpp b/src/common/d3d11/texture.cpp index 1b03d49a6..c4be21de3 100644 --- a/src/common/d3d11/texture.cpp +++ b/src/common/d3d11/texture.cpp @@ -4,7 +4,7 @@ Log_SetChannel(D3D11); namespace D3D11 { -Texture::Texture() : m_width(0), m_height(0) {} +Texture::Texture() : m_width(0), m_height(0), m_samples(0) {} Texture::Texture(ComPtr texture, ComPtr srv, ComPtr rtv) @@ -13,6 +13,7 @@ Texture::Texture(ComPtr texture, ComPtr srv; if (bind_flags & D3D11_BIND_SHADER_RESOURCE) { - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_TEXTURE2D, desc.Format, 0, desc.MipLevels, 0, - desc.ArraySize); + const D3D11_SRV_DIMENSION srv_dimension = + (desc.SampleDesc.Count > 1) ? D3D11_SRV_DIMENSION_TEXTURE2DMS : D3D11_SRV_DIMENSION_TEXTURE2D; + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, desc.Format, 0, desc.MipLevels, 0, desc.ArraySize); const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.GetAddressOf()); if (FAILED(hr)) { @@ -61,7 +63,9 @@ bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT fo ComPtr rtv; if (bind_flags & D3D11_BIND_RENDER_TARGET) { - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2D, desc.Format, 0, 0, desc.ArraySize); + const D3D11_RTV_DIMENSION rtv_dimension = + (desc.SampleDesc.Count > 1) ? D3D11_RTV_DIMENSION_TEXTURE2DMS : D3D11_RTV_DIMENSION_TEXTURE2D; + const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, desc.Format, 0, 0, desc.ArraySize); const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf()); if (FAILED(hr)) { @@ -75,6 +79,7 @@ bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT fo m_rtv = std::move(rtv); m_width = desc.Width; m_height = desc.Height; + m_samples = desc.SampleDesc.Count; return true; } @@ -86,8 +91,9 @@ bool Texture::Adopt(ID3D11Device* device, ComPtr texture) ComPtr srv; if (desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) { - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_TEXTURE2D, desc.Format, 0, desc.MipLevels, 0, - desc.ArraySize); + const D3D11_SRV_DIMENSION srv_dimension = + (desc.SampleDesc.Count > 1) ? D3D11_SRV_DIMENSION_TEXTURE2DMS : D3D11_SRV_DIMENSION_TEXTURE2D; + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, desc.Format, 0, desc.MipLevels, 0, desc.ArraySize); const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.ReleaseAndGetAddressOf()); if (FAILED(hr)) { @@ -99,7 +105,9 @@ bool Texture::Adopt(ID3D11Device* device, ComPtr texture) ComPtr rtv; if (desc.BindFlags & D3D11_BIND_RENDER_TARGET) { - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2D, desc.Format, 0, 0, desc.ArraySize); + const D3D11_RTV_DIMENSION rtv_dimension = + (desc.SampleDesc.Count > 1) ? D3D11_RTV_DIMENSION_TEXTURE2DMS : D3D11_RTV_DIMENSION_TEXTURE2D; + const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, desc.Format, 0, 0, desc.ArraySize); const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.ReleaseAndGetAddressOf()); if (FAILED(hr)) { @@ -113,6 +121,7 @@ bool Texture::Adopt(ID3D11Device* device, ComPtr texture) m_rtv = std::move(rtv); m_width = desc.Width; m_height = desc.Height; + m_samples = desc.SampleDesc.Count; return true; } @@ -123,6 +132,7 @@ void Texture::Destroy() m_texture.Reset(); m_width = 0; m_height = 0; + m_samples = 0; } } // namespace D3D11 \ No newline at end of file diff --git a/src/common/d3d11/texture.h b/src/common/d3d11/texture.h index 357d42e2b..5549661f2 100644 --- a/src/common/d3d11/texture.h +++ b/src/common/d3d11/texture.h @@ -23,6 +23,8 @@ public: ALWAYS_INLINE u32 GetWidth() const { return m_width; } ALWAYS_INLINE u32 GetHeight() const { return m_height; } + ALWAYS_INLINE u32 GetSamples() const { return m_samples; } + ALWAYS_INLINE bool IsMultisampled() const { return m_samples > 1; } ALWAYS_INLINE DXGI_FORMAT GetFormat() const { return GetDesc().Format; } D3D11_TEXTURE2D_DESC GetDesc() const; @@ -31,7 +33,7 @@ public: ALWAYS_INLINE operator ID3D11RenderTargetView*() const { return m_rtv.Get(); } ALWAYS_INLINE operator bool() const { return static_cast(m_texture); } - bool Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, u32 bind_flags, + bool Create(ID3D11Device* device, u32 width, u32 height, u32 samples, DXGI_FORMAT format, u32 bind_flags, const void* initial_data = nullptr, u32 initial_data_stride = 0); bool Adopt(ID3D11Device* device, ComPtr texture); @@ -43,5 +45,6 @@ private: ComPtr m_rtv; u32 m_width; u32 m_height; + u32 m_samples; }; } // namespace D3D11 \ No newline at end of file diff --git a/src/common/gl/texture.cpp b/src/common/gl/texture.cpp index 819223c0a..152df8fbb 100644 --- a/src/common/gl/texture.cpp +++ b/src/common/gl/texture.cpp @@ -8,11 +8,13 @@ namespace GL { Texture::Texture() = default; Texture::Texture(Texture&& moved) - : m_id(moved.m_id), m_width(moved.m_width), m_height(moved.m_height), m_fbo_id(moved.m_fbo_id) + : m_id(moved.m_id), m_width(moved.m_width), m_height(moved.m_height), m_samples(moved.m_samples), + m_fbo_id(moved.m_fbo_id) { moved.m_id = 0; moved.m_width = 0; moved.m_height = 0; + moved.m_samples = 0; moved.m_fbo_id = 0; } @@ -21,20 +23,32 @@ Texture::~Texture() Destroy(); } -bool Texture::Create(u32 width, u32 height, GLenum internal_format, GLenum format, GLenum type, const void* data, - bool linear_filter, bool wrap) +bool Texture::Create(u32 width, u32 height, u32 samples, GLenum internal_format, GLenum format, GLenum type, + const void* data, bool linear_filter, bool wrap) { glGetError(); + const GLenum target = (samples > 1) ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; + GLuint id; glGenTextures(1, &id); - glBindTexture(GL_TEXTURE_2D, id); - glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, data); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glBindTexture(target, id); + + if (samples > 1) + { + glTexImage2DMultisample(target, samples, internal_format, width, height, GL_FALSE); + } + else + { + glTexImage2D(target, 0, internal_format, width, height, 0, format, type, data); + + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST); + glTexParameteri(target, GL_TEXTURE_WRAP_S, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); + glTexParameteri(target, GL_TEXTURE_WRAP_T, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); + } + + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, 1); GLenum error = glGetError(); if (error != GL_NO_ERROR) @@ -50,15 +64,19 @@ bool Texture::Create(u32 width, u32 height, GLenum internal_format, GLenum forma m_id = id; m_width = width; m_height = height; + m_samples = samples; return true; } void Texture::SetLinearFilter(bool enabled) { + Assert(!IsMultisampled()); + Bind(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, enabled ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, enabled ? GL_LINEAR : GL_NEAREST); + const GLenum target = GetGLTarget(); + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, enabled ? GL_LINEAR : GL_NEAREST); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, enabled ? GL_LINEAR : GL_NEAREST); } bool Texture::CreateFramebuffer() @@ -100,11 +118,12 @@ void Texture::Destroy() m_width = 0; m_height = 0; + m_samples = 0; } void Texture::Bind() { - glBindTexture(GL_TEXTURE_2D, m_id); + glBindTexture(GetGLTarget(), m_id); } void Texture::BindFramebuffer(GLenum target /*= GL_DRAW_FRAMEBUFFER*/) @@ -125,11 +144,13 @@ Texture& Texture::operator=(Texture&& moved) m_id = moved.m_id; m_width = moved.m_width; m_height = moved.m_height; + m_samples = moved.m_samples; m_fbo_id = moved.m_fbo_id; moved.m_id = 0; moved.m_width = 0; moved.m_height = 0; + moved.m_samples = 0; moved.m_fbo_id = 0; return *this; } diff --git a/src/common/gl/texture.h b/src/common/gl/texture.h index 531974c05..3a8102fda 100644 --- a/src/common/gl/texture.h +++ b/src/common/gl/texture.h @@ -10,8 +10,8 @@ public: Texture(Texture&& moved); ~Texture(); - bool Create(u32 width, u32 height, GLenum internal_format, GLenum format, GLenum type, const void* data = nullptr, - bool linear_filter = false, bool wrap = false); + bool Create(u32 width, u32 height, u32 samples, GLenum internal_format, GLenum format, GLenum type, + const void* data = nullptr, bool linear_filter = false, bool wrap = false); bool CreateFramebuffer(); void Destroy(); @@ -19,11 +19,14 @@ public: void SetLinearFilter(bool enabled); bool IsValid() const { return m_id != 0; } + bool IsMultisampled() const { return m_samples > 1; } GLuint GetGLId() const { return m_id; } u32 GetWidth() const { return m_width; } u32 GetHeight() const { return m_height; } + u32 GetSamples() const { return m_samples; } GLuint GetGLFramebufferID() const { return m_fbo_id; } + GLenum GetGLTarget() const { return IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; } void Bind(); void BindFramebuffer(GLenum target = GL_DRAW_FRAMEBUFFER); @@ -42,6 +45,7 @@ private: GLuint m_id = 0; u32 m_width = 0; u32 m_height = 0; + u32 m_samples = 0; GLuint m_fbo_id = 0; }; diff --git a/src/common/vulkan/builders.cpp b/src/common/vulkan/builders.cpp index 349b9263a..c0947e457 100644 --- a/src/common/vulkan/builders.cpp +++ b/src/common/vulkan/builders.cpp @@ -248,6 +248,13 @@ void GraphicsPipelineBuilder::SetLineWidth(float width) m_rasterization_state.lineWidth = width; } +void GraphicsPipelineBuilder::SetMultisamples(u32 multisamples, bool per_sample_shading) +{ + m_multisample_state.rasterizationSamples = static_cast(multisamples); + m_multisample_state.sampleShadingEnable = per_sample_shading; + m_multisample_state.minSampleShading = (multisamples > 1) ? 0.0f : 1.0f; +} + void GraphicsPipelineBuilder::SetNoCullRasterizationState() { SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); diff --git a/src/common/vulkan/builders.h b/src/common/vulkan/builders.h index cd5032247..217e3e7a8 100644 --- a/src/common/vulkan/builders.h +++ b/src/common/vulkan/builders.h @@ -81,6 +81,7 @@ public: void SetRasterizationState(VkPolygonMode polygon_mode, VkCullModeFlags cull_mode, VkFrontFace front_face); void SetLineWidth(float width); + void SetMultisamples(u32 multisamples, bool per_sample_shading); void SetNoCullRasterizationState(); void SetDepthState(bool depth_test, bool depth_write, VkCompareOp compare_op); diff --git a/src/common/vulkan/context.cpp b/src/common/vulkan/context.cpp index 505c7bf31..35d1cf819 100644 --- a/src/common/vulkan/context.cpp +++ b/src/common/vulkan/context.cpp @@ -472,6 +472,7 @@ bool Context::SelectDeviceFeatures(const VkPhysicalDeviceFeatures* required_feat // Enable the features we use. m_device_features.dualSrcBlend = available_features.dualSrcBlend; + m_device_features.sampleRateShading = available_features.sampleRateShading; return true; } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index c2bb1c950..1fbcbb490 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -47,12 +47,27 @@ bool GPU_HW::Initialize(HostDisplay* host_display) return false; m_resolution_scale = CalculateResolutionScale(); + m_multisamples = std::min(g_settings.gpu_multisamples, m_max_multisamples); m_render_api = host_display->GetRenderAPI(); + m_per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading; m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; m_texture_filtering = g_settings.gpu_texture_filter; m_using_uv_limits = ShouldUseUVLimits(); PrintSettingsToLog(); + + if (m_multisamples != g_settings.gpu_multisamples) + { + g_host_interface->AddFormattedOSDMessage( + 20.0f, g_host_interface->TranslateString("OSDMessage", "%ux MSAA is not supported, using %ux instead."), + g_settings.gpu_multisamples, m_multisamples); + } + if (!m_per_sample_shading && g_settings.gpu_per_sample_shading) + { + g_host_interface->AddOSDMessage( + g_host_interface->TranslateStdString("OSDMessage", "SSAA is not supported, using MSAA instead."), 20.0f); + } + return true; } @@ -91,12 +106,15 @@ bool GPU_HW::DoState(StateWrapper& sw) void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) { const u32 resolution_scale = CalculateResolutionScale(); + const u32 multisamples = std::min(m_max_multisamples, g_settings.gpu_multisamples); const bool use_uv_limits = ShouldUseUVLimits(); - *framebuffer_changed = (m_resolution_scale != resolution_scale); - *shaders_changed = (m_resolution_scale != resolution_scale || m_true_color != g_settings.gpu_true_color || - m_scaled_dithering != g_settings.gpu_scaled_dithering || - m_texture_filtering != g_settings.gpu_texture_filter || m_using_uv_limits != use_uv_limits); + *framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples); + *shaders_changed = + (m_resolution_scale != resolution_scale || m_multisamples != multisamples || + m_true_color != g_settings.gpu_true_color || m_per_sample_shading != g_settings.gpu_per_sample_shading || + m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter || + m_using_uv_limits != use_uv_limits); if (m_resolution_scale != resolution_scale) { @@ -107,7 +125,24 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) VRAM_HEIGHT * resolution_scale); } + if (m_multisamples != multisamples || m_per_sample_shading != g_settings.gpu_per_sample_shading) + { + if (g_settings.gpu_per_sample_shading) + { + g_host_interface->AddFormattedOSDMessage( + 10.0f, g_host_interface->TranslateString("OSDMessage", "Multisample anti-aliasing set to %ux (SSAA)."), + multisamples); + } + else + { + g_host_interface->AddFormattedOSDMessage( + 10.0f, g_host_interface->TranslateString("OSDMessage", "Multisample anti-aliasing set to %ux."), multisamples); + } + } + m_resolution_scale = resolution_scale; + m_multisamples = multisamples; + m_per_sample_shading = g_settings.gpu_per_sample_shading; m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; m_texture_filtering = g_settings.gpu_texture_filter; @@ -147,6 +182,7 @@ void GPU_HW::PrintSettingsToLog() { Log_InfoPrintf("Resolution Scale: %u (%ux%u), maximum %u", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, VRAM_HEIGHT * m_resolution_scale, m_max_resolution_scale); + Log_InfoPrintf("Multisampling: %ux%s", m_multisamples, m_per_sample_shading ? " (per sample shading)" : ""); Log_InfoPrintf("Dithering: %s%s", m_true_color ? "Disabled" : "Enabled", (!m_true_color && m_scaled_dithering) ? " (Scaled)" : ""); Log_InfoPrintf("Texture Filtering: %s", Settings::GetTextureFilterDisplayName(m_texture_filtering)); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 9f9064849..55df2374f 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -34,7 +34,7 @@ public: virtual bool Initialize(HostDisplay* host_display) override; virtual void Reset() override; virtual bool DoState(StateWrapper& sw) override; - + void UpdateResolutionScale() override final; std::tuple GetEffectiveDisplayResolution() override final; @@ -187,6 +187,8 @@ protected: u32 CalculateResolutionScale() const; + ALWAYS_INLINE bool IsUsingMultisampling() const { return m_multisamples > 1; } + void SetFullVRAMDirtyRectangle() { m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); @@ -266,11 +268,15 @@ protected: s32 m_current_depth = 0; u32 m_resolution_scale = 1; + u32 m_multisamples = 1; u32 m_max_resolution_scale = 1; + u32 m_max_multisamples = 1; HostDisplay::RenderAPI m_render_api = HostDisplay::RenderAPI::None; + bool m_per_sample_shading = false; bool m_true_color = true; bool m_scaled_dithering = false; GPUTextureFilter m_texture_filtering = GPUTextureFilter::Nearest; + bool m_supports_per_sample_shading = false; bool m_supports_dual_source_blend = false; bool m_using_uv_limits = false; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 685a70c59..f7d132c03 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -16,7 +16,8 @@ GPU_HW_D3D11::~GPU_HW_D3D11() if (m_host_display) m_host_display->ClearDisplayTexture(); - m_context->ClearState(); + if (m_context) + m_context->ClearState(); DestroyShaders(); DestroyStateObjects(); @@ -30,16 +31,16 @@ bool GPU_HW_D3D11::Initialize(HostDisplay* host_display) return false; } - SetCapabilities(); - - if (!GPU_HW::Initialize(host_display)) - return false; - m_device = static_cast(host_display->GetRenderDevice()); m_context = static_cast(host_display->GetRenderContext()); if (!m_device || !m_context) return false; + SetCapabilities(); + + if (!GPU_HW::Initialize(host_display)) + return false; + if (!CreateFramebuffer()) { Log_ErrorPrintf("Failed to create framebuffer"); @@ -123,6 +124,9 @@ void GPU_HW_D3D11::UpdateSettings() if (framebuffer_changed) { + RestoreGraphicsAPIState(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ResetGraphicsAPIState(); m_host_display->ClearDisplayTexture(); CreateFramebuffer(); } @@ -138,6 +142,7 @@ void GPU_HW_D3D11::UpdateSettings() if (framebuffer_changed) { RestoreGraphicsAPIState(); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); ResetGraphicsAPIState(); @@ -173,54 +178,54 @@ void GPU_HW_D3D11::SetCapabilities() m_max_resolution_scale = max_texture_scale; m_supports_dual_source_blend = true; + m_supports_per_sample_shading = (m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_10_1); + + m_max_multisamples = 1; + for (u32 multisamples = 2; multisamples < D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) + { + UINT num_quality_levels; + if (SUCCEEDED( + m_device->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, multisamples, &num_quality_levels)) && + num_quality_levels > 0) + { + m_max_multisamples = multisamples; + } + } } bool GPU_HW_D3D11::CreateFramebuffer() { - // save old vram texture/fbo, in case we're changing scale - auto old_vram_texture = std::move(m_vram_texture); DestroyFramebuffer(); // scale vram size to internal resolution const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; + const u32 multisamples = m_multisamples; const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM; const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM; - if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, + if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, multisamples, texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, depth_format, + !m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, multisamples, depth_format, D3D11_BIND_DEPTH_STENCIL) || - !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, + !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, 1, texture_format, D3D11_BIND_SHADER_RESOURCE) || - !m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, + !m_display_texture.Create(m_device.Get(), texture_width, texture_height, 1, texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, + !m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || !m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false)) { return false; } - const CD3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc(D3D11_DSV_DIMENSION_TEXTURE2D, depth_format); + const CD3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc( + multisamples > 1 ? D3D11_DSV_DIMENSION_TEXTURE2DMS : D3D11_DSV_DIMENSION_TEXTURE2D, depth_format); HRESULT hr = m_device->CreateDepthStencilView(m_vram_depth_texture, &depth_view_desc, m_vram_depth_view.GetAddressOf()); if (FAILED(hr)) return false; - // do we need to restore the framebuffer after a size change? - if (old_vram_texture) - { - const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth(); - Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(), - old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - linear_filter ? "linear" : "nearest"); - - BlitTexture(m_vram_texture.GetD3DRTV(), 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - old_vram_texture.GetD3DSRV(), 0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), - old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter); - } - m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr); SetFullVRAMDirtyRectangle(); return true; @@ -281,9 +286,21 @@ bool GPU_HW_D3D11::CreateStateObjects() CD3D11_RASTERIZER_DESC rs_desc = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); rs_desc.CullMode = D3D11_CULL_NONE; rs_desc.ScissorEnable = TRUE; + rs_desc.MultisampleEnable = IsUsingMultisampling(); hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf()); if (FAILED(hr)) return false; + if (IsUsingMultisampling()) + { + rs_desc.MultisampleEnable = FALSE; + hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state_no_msaa.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + return false; + } + else + { + m_cull_none_rasterizer_state_no_msaa = m_cull_none_rasterizer_state; + } CD3D11_DEPTH_STENCIL_DESC ds_desc = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); ds_desc.DepthEnable = FALSE; @@ -364,6 +381,7 @@ void GPU_HW_D3D11::DestroyStateObjects() m_depth_test_always_state.Reset(); m_depth_disabled_state.Reset(); m_cull_none_rasterizer_state.Reset(); + m_cull_none_rasterizer_state_no_msaa.Reset(); } bool GPU_HW_D3D11::CompileShaders() @@ -372,8 +390,9 @@ bool GPU_HW_D3D11::CompileShaders() shader_cache.Open(g_host_interface->GetShaderCacheBasePath(), m_device->GetFeatureLevel(), g_settings.gpu_use_debug_device); - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, - m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, + m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, + m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3); @@ -572,25 +591,6 @@ void GPU_HW_D3D11::SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height) SetScissor(x, y, width, height); } -void GPU_HW_D3D11::BlitTexture(ID3D11RenderTargetView* dst, u32 dst_x, u32 dst_y, u32 dst_width, u32 dst_height, - ID3D11ShaderResourceView* src, u32 src_x, u32 src_y, u32 src_width, u32 src_height, - u32 src_texture_width, u32 src_texture_height, bool linear_filter) -{ - const float uniforms[4] = {static_cast(src_x) / static_cast(src_texture_width), - static_cast(src_y) / static_cast(src_texture_height), - static_cast(src_width) / static_cast(src_texture_width), - static_cast(src_height) / static_cast(src_texture_height)}; - - m_context->OMSetRenderTargets(1, &dst, nullptr); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, &src); - m_context->PSSetSamplers( - 0, 1, linear_filter ? m_linear_sampler_state.GetAddressOf() : m_point_sampler_state.GetAddressOf()); - SetViewport(dst_x, dst_y, dst_width, dst_height); - SetScissor(dst_x, dst_y, dst_width, dst_height); - DrawUtilityShader(m_copy_pixel_shader.Get(), uniforms, sizeof(uniforms)); -} - void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* uniforms, u32 uniforms_size) { if (uniforms) @@ -650,8 +650,20 @@ void GPU_HW_D3D11::UpdateDisplay() if (g_settings.debugging.show_vram) { - m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + m_host_display->SetDisplayTexture(m_vram_read_texture.GetD3DSRV(), m_vram_read_texture.GetWidth(), + m_vram_read_texture.GetHeight(), 0, 0, m_vram_read_texture.GetWidth(), + m_vram_read_texture.GetHeight()); + } + else + { + m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), + m_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(), + m_vram_texture.GetHeight()); + } + m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } @@ -672,7 +684,7 @@ void GPU_HW_D3D11::UpdateDisplay() m_host_display->ClearDisplayTexture(); } else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), @@ -681,6 +693,7 @@ void GPU_HW_D3D11::UpdateDisplay() } else { + m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); @@ -719,6 +732,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) // Encode the 24-bit texture as 16-bit. const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr); m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); @@ -796,7 +810,7 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) { const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); @@ -844,8 +858,17 @@ void GPU_HW_D3D11::UpdateVRAMReadTexture() { const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1); - m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0, - &src_box); + + if (m_vram_texture.IsMultisampled()) + { + m_context->ResolveSubresource(m_vram_read_texture.GetD3DTexture(), 0, m_vram_texture.GetD3DTexture(), 0, + m_vram_texture.GetFormat()); + } + else + { + m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0, + &src_box); + } GPU_HW::UpdateVRAMReadTexture(); } diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 1589777b6..3c0fb5989 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -65,11 +65,6 @@ private: void SetScissor(u32 x, u32 y, u32 width, u32 height); void SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height); - /// Blits from src to dst, downscaling or upscaling in the process. - void BlitTexture(ID3D11RenderTargetView* dst, u32 dst_x, u32 dst_y, u32 dst_width, u32 dst_height, - ID3D11ShaderResourceView* src, u32 src_x, u32 src_y, u32 src_width, u32 src_height, - u32 src_texture_width, u32 src_texture_height, bool linear_filter); - void DrawUtilityShader(ID3D11PixelShader* shader, const void* uniforms, u32 uniforms_size); ComPtr m_device; @@ -94,6 +89,7 @@ private: ComPtr m_texture_stream_buffer_srv_r16ui; ComPtr m_cull_none_rasterizer_state; + ComPtr m_cull_none_rasterizer_state_no_msaa; ComPtr m_depth_disabled_state; ComPtr m_depth_test_always_state; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index c0797cafb..d43fa448a 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -128,6 +128,9 @@ void GPU_HW_OpenGL::UpdateSettings() if (framebuffer_changed) { + RestoreGraphicsAPIState(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ResetGraphicsAPIState(); m_host_display->ClearDisplayTexture(); CreateFramebuffer(); } @@ -137,6 +140,7 @@ void GPU_HW_OpenGL::UpdateSettings() if (framebuffer_changed) { RestoreGraphicsAPIState(); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); ResetGraphicsAPIState(); @@ -179,6 +183,15 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size); m_max_resolution_scale = static_cast(max_texture_size / VRAM_WIDTH); + m_max_multisamples = 1; + glGetIntegerv(GL_MAX_SAMPLES, reinterpret_cast(&m_max_multisamples)); + if (m_max_multisamples == 0) + m_max_multisamples = 1; + + m_supports_per_sample_shading = GLAD_GL_ARB_sample_shading; + Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); + Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment); @@ -212,9 +225,16 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) Log_InfoPrintf("Max shader storage buffer size: %u", max_ssbo_size); m_use_ssbo_for_vram_writes = (max_ssbo_size >= (VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16))); if (m_use_ssbo_for_vram_writes) + { Log_InfoPrintf("Using shader storage buffers for VRAM writes."); + } else - Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower."); + { + Log_WarningPrintf( + "Texture buffers are not supported, VRAM writes will be slower and multisampling will be unavailable."); + m_max_multisamples = 1; + m_supports_per_sample_shading = false; + } } int max_dual_source_draw_buffers = 0; @@ -240,24 +260,23 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) bool GPU_HW_OpenGL::CreateFramebuffer() { - // save old vram texture/fbo, in case we're changing scale - GL::Texture old_vram_texture = std::move(m_vram_texture); - GLuint old_vram_fbo = m_vram_fbo_id; - // scale vram size to internal resolution const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; + const u32 multisamples = m_multisamples; - if (!m_vram_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, - true) || - !m_vram_depth_texture.Create(texture_width, texture_height, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, - GL_UNSIGNED_SHORT, nullptr, false) || - !m_vram_read_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, + if (!m_vram_texture.Create(texture_width, texture_height, multisamples, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, + false, true) || + !m_vram_depth_texture.Create(texture_width, texture_height, multisamples, GL_DEPTH_COMPONENT16, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, nullptr, false) || + !m_vram_read_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true) || !m_vram_read_texture.CreateFramebuffer() || - !m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) || + !m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, + false) || !m_vram_encoding_texture.CreateFramebuffer() || - !m_display_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) || + !m_display_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, + false) || !m_display_texture.CreateFramebuffer()) { return false; @@ -265,28 +284,12 @@ bool GPU_HW_OpenGL::CreateFramebuffer() glGenFramebuffers(1, &m_vram_fbo_id); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_texture.GetGLId(), 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_vram_depth_texture.GetGLId(), 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_vram_texture.GetGLTarget(), + m_vram_texture.GetGLId(), 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_vram_depth_texture.GetGLTarget(), + m_vram_depth_texture.GetGLId(), 0); Assert(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - // do we need to restore the framebuffer after a size change? - if (old_vram_fbo != 0) - { - const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth(); - Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(), - old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - linear_filter ? "linear" : "nearest"); - glDisable(GL_SCISSOR_TEST); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, old_vram_fbo); - glBlitFramebuffer(0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(), - m_vram_texture.GetHeight(), GL_COLOR_BUFFER_BIT, linear_filter ? GL_LINEAR : GL_NEAREST); - - glEnable(GL_SCISSOR_TEST); - old_vram_texture.Destroy(); - glDeleteFramebuffers(1, &old_vram_fbo); - } - SetFullVRAMDirtyRectangle(); return true; } @@ -366,8 +369,9 @@ bool GPU_HW_OpenGL::CompilePrograms() shader_cache.Open(IsGLES(), g_host_interface->GetShaderCacheBasePath()); const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout(); - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, - m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, + m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, + m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5; @@ -630,10 +634,22 @@ void GPU_HW_OpenGL::UpdateDisplay() if (g_settings.debugging.show_vram) { - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), - m_vram_texture.GetWidth(), static_cast(m_vram_texture.GetHeight()), 0, - m_vram_texture.GetHeight(), m_vram_texture.GetWidth(), - -static_cast(m_vram_texture.GetHeight())); + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + + m_host_display->SetDisplayTexture( + reinterpret_cast(static_cast(m_vram_read_texture.GetGLId())), m_vram_read_texture.GetWidth(), + static_cast(m_vram_read_texture.GetHeight()), 0, m_vram_read_texture.GetHeight(), + m_vram_read_texture.GetWidth(), -static_cast(m_vram_read_texture.GetHeight())); + } + else + { + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), + m_vram_texture.GetWidth(), static_cast(m_vram_texture.GetHeight()), 0, + m_vram_texture.GetHeight(), m_vram_texture.GetWidth(), + -static_cast(m_vram_texture.GetHeight())); + } m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } @@ -654,7 +670,7 @@ void GPU_HW_OpenGL::UpdateDisplay() m_host_display->ClearDisplayTexture(); } else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None && - (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), @@ -868,7 +884,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const u32 flipped_y = VRAM_HEIGHT - y - height; // update texture data - glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, + glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, reinterpret_cast(static_cast(map_result.buffer_offset))); m_texture_stream_buffer->Unbind(); @@ -939,13 +955,13 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid if (GLAD_GL_VERSION_4_3) { - glCopyImageSubData(m_vram_texture.GetGLId(), GL_TEXTURE_2D, 0, src_x, src_y, 0, m_vram_texture.GetGLId(), - GL_TEXTURE_2D, 0, dst_x, dst_y, 0, width, height, 1); + glCopyImageSubData(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, src_x, src_y, 0, + m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, dst_x, dst_y, 0, width, height, 1); } else if (GLAD_GL_EXT_copy_image) { - glCopyImageSubDataEXT(m_vram_texture.GetGLId(), GL_TEXTURE_2D, 0, src_x, src_y, 0, m_vram_texture.GetGLId(), - GL_TEXTURE_2D, 0, dst_x, dst_y, 0, width, height, 1); + glCopyImageSubDataEXT(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, src_x, src_y, 0, + m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, dst_x, dst_y, 0, width, height, 1); } else { @@ -964,16 +980,17 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture() const u32 height = scaled_rect.GetHeight(); const u32 x = scaled_rect.left; const u32 y = m_vram_texture.GetHeight() - scaled_rect.top - height; + const bool multisampled = m_vram_texture.IsMultisampled(); - if (GLAD_GL_VERSION_4_3) + if (!multisampled && GLAD_GL_VERSION_4_3) { - glCopyImageSubData(m_vram_texture.GetGLId(), GL_TEXTURE_2D, 0, x, y, 0, m_vram_read_texture.GetGLId(), - GL_TEXTURE_2D, 0, x, y, 0, width, height, 1); + glCopyImageSubData(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, + m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); } - else if (GLAD_GL_EXT_copy_image) + else if (!multisampled && GLAD_GL_EXT_copy_image) { - glCopyImageSubDataEXT(m_vram_texture.GetGLId(), GL_TEXTURE_2D, 0, x, y, 0, m_vram_read_texture.GetGLId(), - GL_TEXTURE_2D, 0, x, y, 0, width, height, 1); + glCopyImageSubDataEXT(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, + m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); } else { diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index b7b2041fb..47788bb01 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -3,9 +3,11 @@ #include #include -GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, bool supports_dual_source_blend) : - ShaderGen(render_api, supports_dual_source_blend), - m_resolution_scale(resolution_scale), m_true_color(true_color), +GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, + bool per_sample_shading, bool true_color, bool scaled_dithering, + GPUTextureFilter texture_filtering, bool uv_limits, bool supports_dual_source_blend) + : ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale), + m_multisamples(multisamples), m_true_color(true_color), m_per_sample_shading(per_sample_shading), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits) { } @@ -14,9 +16,13 @@ GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default; void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss) { + DefineMacro(ss, "MULTISAMPLING", UsingMSAA()); + ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n"; ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n"; ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0, 1.0) / float2(VRAM_SIZE);\n"; + ss << "CONSTANT uint MULTISAMPLES = " << m_multisamples << "u;\n"; + ss << "CONSTANT bool PER_SAMPLE_SHADING = " << (m_per_sample_shading ? "true" : "false") << ";\n"; ss << R"( float fixYCoord(float y) @@ -90,17 +96,20 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) { DeclareVertexEntryPoint( ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1, - {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false); + {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false, "", UsingMSAA(), + UsingPerSampleShading()); } else { DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, - {{"nointerpolation", "uint4 v_texpage"}}, false); + {{"nointerpolation", "uint4 v_texpage"}}, false, "", UsingMSAA(), + UsingPerSampleShading()); } } else { - DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0"}, 1, 0, {}, false); + DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0"}, 1, 0, {}, false, "", UsingMSAA(), + UsingPerSampleShading()); } ss << R"( @@ -767,17 +776,17 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, - true, use_dual_source ? 2 : 1, true); + true, use_dual_source ? 2 : 1, true, UsingMSAA(), UsingPerSampleShading()); } else { - DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, - true); + DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true, + UsingMSAA(), UsingPerSampleShading()); } } else { - DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, true); + DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, true, UsingMSAA(), UsingPerSampleShading()); } ss << R"( @@ -976,7 +985,22 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, WriteCommonFunctions(ss); DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true); - DeclareTexture(ss, "samp0", 0); + DeclareTexture(ss, "samp0", 0, UsingMSAA()); + + ss << R"( +float4 LoadVRAM(int2 coords) +{ +#if MULTISAMPLING + float4 value = LOAD_TEXTURE_MS(samp0, coords, 0u); + for (uint sample_index = 1u; sample_index < MULTISAMPLES; sample_index++) + value += LOAD_TEXTURE_MS(samp0, coords, sample_index); + value /= float(MULTISAMPLES); + return value; +#else + return LOAD_TEXTURE(samp0, coords, 0); +#endif +} +)"; DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); ss << R"( @@ -1000,8 +1024,8 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, uint2 vram_coords = u_vram_offset + uint2(((relative_x * 3u) / 2u) * RESOLUTION_SCALE, icoords.y); // load adjacent 16-bit texels - uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(vram_coords % VRAM_SIZE), 0)); - uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2((vram_coords + uint2(RESOLUTION_SCALE, 0)) % VRAM_SIZE), 0)); + uint s0 = RGBA8ToRGBA5551(LoadVRAM(int2(vram_coords % VRAM_SIZE))); + uint s1 = RGBA8ToRGBA5551(LoadVRAM(int2((vram_coords + uint2(RESOLUTION_SCALE, 0)) % VRAM_SIZE))); // select which part of the combined 16-bit texels we are currently shading uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1u) * 8u); @@ -1012,7 +1036,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, #else // load and return uint2 vram_coords = u_vram_offset + uint2(icoords.x + u_crop_left, icoords.y); - o_col0 = LOAD_TEXTURE(samp0, int2(vram_coords % VRAM_SIZE), 0); + o_col0 = LoadVRAM(int2(vram_coords % VRAM_SIZE)); #endif } )"; @@ -1027,13 +1051,26 @@ std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader() WriteCommonFunctions(ss); DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size"}, true); - DeclareTexture(ss, "samp0", 0); + DeclareTexture(ss, "samp0", 0, UsingMSAA()); ss << R"( +float4 LoadVRAM(int2 coords) +{ +#if MULTISAMPLING + float4 value = LOAD_TEXTURE_MS(samp0, coords, 0u); + for (uint sample_index = 1u; sample_index < MULTISAMPLES; sample_index++) + value += LOAD_TEXTURE_MS(samp0, coords, sample_index); + value /= float(MULTISAMPLES); + return value; +#else + return LOAD_TEXTURE(samp0, coords, 0); +#endif +} + uint SampleVRAM(uint2 coords) { if (RESOLUTION_SCALE == 1u) - return RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(coords), 0)); + return RGBA8ToRGBA5551(LoadVRAM(int2(coords))); // Box filter for downsampling. float4 value = float4(0.0, 0.0, 0.0, 0.0); @@ -1041,7 +1078,7 @@ uint SampleVRAM(uint2 coords) for (uint offset_x = 0u; offset_x < RESOLUTION_SCALE; offset_x++) { for (uint offset_y = 0u; offset_y < RESOLUTION_SCALE; offset_y++) - value += LOAD_TEXTURE(samp0, int2(base_coords + uint2(offset_x, offset_y)), 0); + value += LoadVRAM(int2(base_coords + uint2(offset_x, offset_y))); } value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE); return RGBA8ToRGBA5551(value); @@ -1133,6 +1170,9 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() { + // TODO: This won't currently work because we can't bind the texture to both the shader and framebuffer. + const bool msaa = false; + std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); @@ -1141,8 +1181,9 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() "bool u_set_mask_bit", "float u_depth_value"}, true); - DeclareTexture(ss, "samp0", 0); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); + DeclareTexture(ss, "samp0", 0, msaa); + DefineMacro(ss, "MSAA_COPY", msaa); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true, false, false, msaa); ss << R"( { uint2 dst_coords = uint2(v_pos.xy); @@ -1163,7 +1204,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() uint2 src_coords = (u_src_coords + offset) % VRAM_SIZE; // sample and apply mask bit +#if MSAA_COPY + float4 color = LOAD_TEXTURE_MS(samp0, int2(src_coords), f_sample_index); +#else float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0); +#endif o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a); o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0)); })"; @@ -1175,12 +1220,17 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader() { std::stringstream ss; WriteHeader(ss); - DeclareTexture(ss, "samp0", 0); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, true); + WriteCommonFunctions(ss); + DeclareTexture(ss, "samp0", 0, UsingMSAA()); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, true, false, false, UsingMSAA()); ss << R"( { +#if MULTISAMPLING + o_depth = LOAD_TEXTURE_MS(samp0, int2(v_pos.xy), f_sample_index).a; +#else o_depth = LOAD_TEXTURE(samp0, int2(v_pos.xy), 0).a; +#endif } )"; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index a881d4b41..7aea8a908 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -5,8 +5,9 @@ class GPU_HW_ShaderGen : public ShaderGen { public: - GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, bool scaled_dithering, - GPUTextureFilter texture_filtering, bool uv_limits, bool supports_dual_source_blend); + GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, + bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, + bool supports_dual_source_blend); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); @@ -20,11 +21,16 @@ public: std::string GenerateVRAMUpdateDepthFragmentShader(); private: + ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; } + ALWAYS_INLINE bool UsingPerSampleShading() const { return m_multisamples > 1 && m_per_sample_shading; } + void WriteCommonFunctions(std::stringstream& ss); void WriteBatchUniformBuffer(std::stringstream& ss); void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter); u32 m_resolution_scale; + u32 m_multisamples; + bool m_per_sample_shading; bool m_true_color; bool m_scaled_dithering; GPUTextureFilter m_texture_filter; diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 140738033..bdda18983 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -119,19 +119,26 @@ void GPU_HW_Vulkan::UpdateSettings() { GPU_HW::UpdateSettings(); - // Everything should be finished executing before recreating resources. - g_vulkan_context->ExecuteCommandBuffer(true); - bool framebuffer_changed, shaders_changed; UpdateHWSettings(&framebuffer_changed, &shaders_changed); + if (framebuffer_changed) + { + RestoreGraphicsAPIState(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ResetGraphicsAPIState(); + } + if (framebuffer_changed) CreateFramebuffer(); + // Everything should be finished executing before recreating resources. + m_host_display->ClearDisplayTexture(); + g_vulkan_context->ExecuteCommandBuffer(true); + if (shaders_changed) { // clear it since we draw a loading screen and it's not in the correct state - m_host_display->ClearDisplayTexture(); DestroyPipelines(); CompilePipelines(); } @@ -140,6 +147,7 @@ void GPU_HW_Vulkan::UpdateSettings() if (framebuffer_changed) { RestoreGraphicsAPIState(); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); ResetGraphicsAPIState(); @@ -203,11 +211,41 @@ void GPU_HW_Vulkan::SetCapabilities() { const u32 max_texture_size = g_vulkan_context->GetDeviceLimits().maxImageDimension2D; const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; - Log_InfoPrintf("Max texture size: %ux%u", max_texture_size, max_texture_size); - m_max_resolution_scale = max_texture_scale; + + VkImageFormatProperties color_properties = {}; + vkGetPhysicalDeviceImageFormatProperties(g_vulkan_context->GetPhysicalDevice(), VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, &color_properties); + VkImageFormatProperties depth_properties = {}; + vkGetPhysicalDeviceImageFormatProperties(g_vulkan_context->GetPhysicalDevice(), VK_FORMAT_D32_SFLOAT, + VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, &depth_properties); + const VkSampleCountFlags combined_properties = + g_vulkan_context->GetDeviceProperties().limits.framebufferColorSampleCounts & + g_vulkan_context->GetDeviceProperties().limits.framebufferDepthSampleCounts & color_properties.sampleCounts & + depth_properties.sampleCounts; + if (combined_properties & VK_SAMPLE_COUNT_64_BIT) + m_max_multisamples = 64; + else if (combined_properties & VK_SAMPLE_COUNT_32_BIT) + m_max_multisamples = 32; + else if (combined_properties & VK_SAMPLE_COUNT_16_BIT) + m_max_multisamples = 16; + else if (combined_properties & VK_SAMPLE_COUNT_8_BIT) + m_max_multisamples = 8; + else if (combined_properties & VK_SAMPLE_COUNT_4_BIT) + m_max_multisamples = 4; + else if (combined_properties & VK_SAMPLE_COUNT_2_BIT) + m_max_multisamples = 2; + else + m_max_multisamples = 1; + m_supports_dual_source_blend = g_vulkan_context->GetDeviceFeatures().dualSrcBlend; + m_supports_per_sample_shading = g_vulkan_context->GetDeviceFeatures().sampleRateShading; + Log_InfoPrintf("Dual-source blend: %s", m_supports_dual_source_blend ? "supported" : "not supported"); + Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); + Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); #ifdef __APPLE__ // Partial texture buffer uploads appear to be broken in macOS/MoltenVK. @@ -360,8 +398,6 @@ bool GPU_HW_Vulkan::CreateSamplers() bool GPU_HW_Vulkan::CreateFramebuffer() { - // save old vram texture/fbo, in case we're changing scale - auto old_vram_texture = std::move(m_vram_texture); DestroyFramebuffer(); // scale vram size to internal resolution @@ -369,7 +405,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer() const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM; const VkFormat depth_format = VK_FORMAT_D16_UNORM; - const VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + const VkSampleCountFlagBits samples = static_cast(m_multisamples); if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, @@ -378,15 +414,15 @@ bool GPU_HW_Vulkan::CreateFramebuffer() !m_vram_depth_texture.Create(texture_width, texture_height, 1, 1, depth_format, samples, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, + !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_display_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, + !m_display_texture.Create(texture_width, texture_height, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, + !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) || !m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2, VRAM_HEIGHT)) @@ -457,30 +493,6 @@ bool GPU_HW_Vulkan::CreateFramebuffer() m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); dsubuilder.Update(g_vulkan_context->GetDevice()); - if (old_vram_texture.IsValid()) - { - const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth(); - Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(), - old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - linear_filter ? "linear" : "nearest"); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - old_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - const VkImageBlit blit{ - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {{0, 0, 0}, {static_cast(old_vram_texture.GetWidth()), static_cast(old_vram_texture.GetHeight()), 1}}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {{0, 0, 0}, {static_cast(m_vram_texture.GetWidth()), static_cast(m_vram_texture.GetHeight()), 1}}}; - vkCmdBlitImage(cmdbuf, old_vram_texture.GetImage(), old_vram_texture.GetLayout(), m_vram_texture.GetImage(), - m_vram_texture.GetLayout(), 1, &blit, linear_filter ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Can't immediately destroy because we're blitting in the current command buffer. - old_vram_texture.Destroy(true); - } - ClearDisplay(); SetFullVRAMDirtyRectangle(); return true; @@ -583,8 +595,9 @@ bool GPU_HW_Vulkan::CompilePipelines() VkDevice device = g_vulkan_context->GetDevice(); VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, - m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, + m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, + m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3); @@ -682,6 +695,7 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetDepthState(true, true, (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); gpbuilder.SetNoBlendingState(); + gpbuilder.SetMultisamples(m_multisamples, m_per_sample_shading); if ((static_cast(transparency_mode) != TransparencyMode::Disabled && (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && @@ -736,6 +750,7 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetNoBlendingState(); gpbuilder.SetDynamicViewportAndScissorState(); gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); + gpbuilder.SetMultisamples(m_multisamples, false); // VRAM fill { @@ -957,10 +972,20 @@ void GPU_HW_Vulkan::UpdateDisplay() if (g_settings.debugging.show_vram) { - m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, - m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + m_host_display->SetDisplayTexture(&m_vram_read_texture, m_vram_read_texture.GetWidth(), + m_vram_read_texture.GetHeight(), 0, 0, m_vram_read_texture.GetWidth(), + m_vram_read_texture.GetHeight()); + } + else + { + m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, + m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + } m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } @@ -981,7 +1006,7 @@ void GPU_HW_Vulkan::UpdateDisplay() m_host_display->ClearDisplayTexture(); } else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), @@ -1158,7 +1183,7 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) { const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); @@ -1224,14 +1249,28 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture() m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; - const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, - {scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1u}}; - vkCmdCopyImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_read_texture.GetImage(), - m_vram_read_texture.GetLayout(), 1u, ©); + if (m_vram_texture.GetSamples() > VK_SAMPLE_COUNT_1_BIT) + { + const VkImageResolve resolve{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, + {scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1u}}; + vkCmdResolveImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_read_texture.GetImage(), + m_vram_read_texture.GetLayout(), 1, &resolve); + } + else + { + const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, + {scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1u}}; + + vkCmdCopyImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_read_texture.GetImage(), + m_vram_read_texture.GetLayout(), 1u, ©); + } m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index c7ad724bb..9f04498f1 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -429,7 +429,9 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER)); si.SetIntValue("GPU", "ResolutionScale", 1); + si.SetIntValue("GPU", "Multisamples", 1); si.SetBoolValue("GPU", "UseDebugDevice", false); + si.SetBoolValue("GPU", "PerSampleShading", false); si.SetBoolValue("GPU", "TrueColor", false); si.SetBoolValue("GPU", "ScaledDithering", true); si.SetStringValue("GPU", "TextureFilter", Settings::GetTextureFilterName(Settings::DEFAULT_GPU_TEXTURE_FILTER)); @@ -623,6 +625,8 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) m_audio_stream->SetOutputVolume(g_settings.audio_output_muted ? 0 : g_settings.audio_output_volume); if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || + g_settings.gpu_multisamples != old_settings.gpu_multisamples || + g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || g_settings.gpu_true_color != old_settings.gpu_true_color || diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 064976efa..945308742 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -135,7 +135,9 @@ void Settings::Load(SettingsInterface& si) .value_or(DEFAULT_GPU_RENDERER); gpu_adapter = si.GetStringValue("GPU", "Adapter", ""); gpu_resolution_scale = static_cast(si.GetIntValue("GPU", "ResolutionScale", 1)); + gpu_multisamples = static_cast(si.GetIntValue("GPU", "Multisamples", 1)); gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); + gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false); gpu_texture_filter = @@ -265,7 +267,9 @@ void Settings::Save(SettingsInterface& si) const si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer)); si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str()); si.SetIntValue("GPU", "ResolutionScale", static_cast(gpu_resolution_scale)); + si.SetIntValue("GPU", "Multisamples", static_cast(gpu_multisamples)); si.SetBoolValue("GPU", "UseDebugDevice", gpu_use_debug_device); + si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering); si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter)); diff --git a/src/core/settings.h b/src/core/settings.h index 53e41517c..3ad76714e 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -93,7 +93,9 @@ struct Settings std::string gpu_adapter; std::string display_post_process_chain; u32 gpu_resolution_scale = 1; + u32 gpu_multisamples = 1; bool gpu_use_debug_device = false; + bool gpu_per_sample_shading = false; bool gpu_true_color = true; bool gpu_scaled_dithering = false; GPUTextureFilter gpu_texture_filter = GPUTextureFilter::Nearest; diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index 09beade19..aa14df36c 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -153,6 +153,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define VECTOR_COMP_NEQ(a, b) notEqual((a), (b))\n"; ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; + ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n"; ss << "#define BEGIN_ARRAY(type, size) type[size](\n"; @@ -161,7 +162,8 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n"; ss << "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n"; ss << "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n"; - ss << "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, 1.0)); }\n"; + ss << "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, " + "1.0)); }\n"; } else { @@ -189,6 +191,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define VECTOR_COMP_NEQ(a, b) ((a) != (b))\n"; ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n"; + ss << "#define LOAD_TEXTURE_MS(name, coords, sample) name.Load(coords, sample)\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n"; ss << "#define BEGIN_ARRAY(type, size) {\n"; @@ -231,7 +234,7 @@ void ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializ ss << "};\n\n"; } -void ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u32 index) +void ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled /* = false */) { if (m_glsl) { @@ -240,11 +243,11 @@ void ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u32 inde else if (m_use_glsl_binding_layout) ss << "layout(binding = " << index << ") "; - ss << "uniform sampler2D " << name << ";\n"; + ss << "uniform " << (multisampled ? "sampler2DMS " : "sampler2D ") << name << ";\n"; } else { - ss << "Texture2D " << name << " : register(t" << index << ");\n"; + ss << (multisampled ? "Texture2DMS " : "Texture2D ") << name << " : register(t" << index << ");\n"; ss << "SamplerState " << name << "_ss : register(s" << index << ");\n"; } } @@ -267,10 +270,20 @@ void ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* name, u3 } } +static const char* GetInterpolationQualifier(bool glsl, bool vulkan, bool interface_block, bool centroid_interpolation, + bool sample_interpolation) +{ + if (glsl && interface_block && (!vulkan && !GLAD_GL_ARB_shading_language_420pack)) + return (sample_interpolation ? "sample out " : (centroid_interpolation ? "centroid out " : "")); + else + return (sample_interpolation ? "sample " : (centroid_interpolation ? "centroid " : "")); +} + void ShaderGen::DeclareVertexEntryPoint( std::stringstream& ss, const std::initializer_list& attributes, u32 num_color_outputs, u32 num_texcoord_outputs, const std::initializer_list>& additional_outputs, - bool declare_vertex_id, const char* output_block_suffix) + bool declare_vertex_id /* = false */, const char* output_block_suffix /* = "" */, + bool centroid_interpolation /* = false */, bool sample_interpolation /* = false */) { if (m_glsl) { @@ -291,30 +304,42 @@ void ShaderGen::DeclareVertexEntryPoint( if (m_use_glsl_interface_blocks) { + const char* qualifier = + GetInterpolationQualifier(m_glsl, IsVulkan(), true, centroid_interpolation, sample_interpolation); + if (IsVulkan()) ss << "layout(location = 0) "; ss << "out VertexData" << output_block_suffix << " {\n"; for (u32 i = 0; i < num_color_outputs; i++) - ss << " float4 v_col" << i << ";\n"; + ss << " " << qualifier << "float4 v_col" << i << ";\n"; for (u32 i = 0; i < num_texcoord_outputs; i++) - ss << " float2 v_tex" << i << ";\n"; + ss << " " << qualifier << "float2 v_tex" << i << ";\n"; for (const auto [qualifiers, name] : additional_outputs) - ss << " " << qualifiers << " " << name << ";\n"; + { + const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; + ss << " " << qualifier_to_use << " " << name << ";\n"; + } ss << "};\n"; } else { + const char* qualifier = + GetInterpolationQualifier(m_glsl, IsVulkan(), false, centroid_interpolation, sample_interpolation); + for (u32 i = 0; i < num_color_outputs; i++) - ss << "out float4 v_col" << i << ";\n"; + ss << qualifier << "out float4 v_col" << i << ";\n"; for (u32 i = 0; i < num_texcoord_outputs; i++) - ss << "out float2 v_tex" << i << ";\n"; + ss << qualifier << "out float2 v_tex" << i << ";\n"; for (const auto [qualifiers, name] : additional_outputs) - ss << qualifiers << " out " << name << ";\n"; + { + const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; + ss << qualifier_to_use << " out " << name << ";\n"; + } } ss << "#define v_pos gl_Position\n\n"; @@ -331,6 +356,9 @@ void ShaderGen::DeclareVertexEntryPoint( } else { + const char* qualifier = + GetInterpolationQualifier(false, false, false, centroid_interpolation, sample_interpolation); + ss << "void main(\n"; if (declare_vertex_id) @@ -344,15 +372,16 @@ void ShaderGen::DeclareVertexEntryPoint( } for (u32 i = 0; i < num_color_outputs; i++) - ss << " out float4 v_col" << i << " : COLOR" << i << ",\n"; + ss << " " << qualifier << "out float4 v_col" << i << " : COLOR" << i << ",\n"; for (u32 i = 0; i < num_texcoord_outputs; i++) - ss << " out float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; + ss << " " << qualifier << "out float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; u32 additional_counter = num_texcoord_outputs; for (const auto [qualifiers, name] : additional_outputs) { - ss << " " << qualifiers << " out " << name << " : TEXCOORD" << additional_counter << ",\n"; + const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; + ss << " " << qualifier_to_use << " out " << name << " : TEXCOORD" << additional_counter << ",\n"; additional_counter++; } @@ -363,41 +392,58 @@ void ShaderGen::DeclareVertexEntryPoint( void ShaderGen::DeclareFragmentEntryPoint( std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, const std::initializer_list>& additional_inputs, - bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */) + bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */, + bool centroid_interpolation /* = false */, bool sample_interpolation /* = false */, + bool declare_sample_id /* = false */) { if (m_glsl) { if (m_use_glsl_interface_blocks) { + const char* qualifier = + GetInterpolationQualifier(m_glsl, IsVulkan(), true, centroid_interpolation, sample_interpolation); + if (IsVulkan()) ss << "layout(location = 0) "; ss << "in VertexData {\n"; for (u32 i = 0; i < num_color_inputs; i++) - ss << " float4 v_col" << i << ";\n"; + ss << " " << qualifier << "float4 v_col" << i << ";\n"; for (u32 i = 0; i < num_texcoord_inputs; i++) - ss << " float2 v_tex" << i << ";\n"; + ss << " " << qualifier << "float2 v_tex" << i << ";\n"; for (const auto [qualifiers, name] : additional_inputs) - ss << " " << qualifiers << " " << name << ";\n"; + { + const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; + ss << " " << qualifier_to_use << " " << name << ";\n"; + } ss << "};\n"; } else { + const char* qualifier = + GetInterpolationQualifier(m_glsl, IsVulkan(), false, centroid_interpolation, sample_interpolation); + for (u32 i = 0; i < num_color_inputs; i++) - ss << "in float4 v_col" << i << ";\n"; + ss << qualifier << "in float4 v_col" << i << ";\n"; for (u32 i = 0; i < num_texcoord_inputs; i++) - ss << "in float2 v_tex" << i << ";\n"; + ss << qualifier << "in float2 v_tex" << i << ";\n"; for (const auto [qualifiers, name] : additional_inputs) - ss << qualifiers << " in " << name << ";\n"; + { + const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; + ss << qualifier_to_use << " in " << name << ";\n"; + } } if (declare_fragcoord) ss << "#define v_pos gl_FragCoord\n"; + if (declare_sample_id) + ss << "#define f_sample_index uint(gl_SampleID)\n"; + if (depth_output) ss << "#define o_depth gl_FragDepth\n"; @@ -427,43 +473,47 @@ void ShaderGen::DeclareFragmentEntryPoint( } else { + const char* qualifier = + GetInterpolationQualifier(false, false, false, centroid_interpolation, sample_interpolation); + + ss << "void main(\n"; + + for (u32 i = 0; i < num_color_inputs; i++) + ss << " " << qualifier << "in float4 v_col" << i << " : COLOR" << i << ",\n"; + + for (u32 i = 0; i < num_texcoord_inputs; i++) + ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; + + u32 additional_counter = num_texcoord_inputs; + for (const auto [qualifiers, name] : additional_inputs) { - ss << "void main(\n"; + const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; + ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n"; + additional_counter++; + } - for (u32 i = 0; i < num_color_inputs; i++) - ss << " in float4 v_col" << i << " : COLOR" << i << ",\n"; + if (declare_fragcoord) + ss << " in float4 v_pos : SV_Position,\n"; + if (declare_sample_id) + ss << " in uint f_sample_index : SV_SampleIndex,\n"; - for (u32 i = 0; i < num_texcoord_inputs; i++) - ss << " in float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; + if (depth_output) + { + ss << " out float o_depth : SV_Depth"; + if (num_color_outputs > 0) + ss << ",\n"; + else + ss << ")\n"; + } - u32 additional_counter = num_texcoord_inputs; - for (const auto [qualifiers, name] : additional_inputs) - { - ss << " " << qualifiers << " in " << name << " : TEXCOORD" << additional_counter << ",\n"; - additional_counter++; - } + for (u32 i = 0; i < num_color_outputs; i++) + { + ss << " out float4 o_col" << i << " : SV_Target" << i; - if (declare_fragcoord) - ss << " in float4 v_pos : SV_Position,\n"; - - if (depth_output) - { - ss << " out float o_depth : SV_Depth"; - if (num_color_outputs > 0) - ss << ",\n"; - else - ss << ")\n"; - } - - for (u32 i = 0; i < num_color_outputs; i++) - { - ss << " out float4 o_col" << i << " : SV_Target" << i; - - if (i == (num_color_outputs - 1)) - ss << ")\n"; - else - ss << ",\n"; - } + if (i == (num_color_outputs - 1)) + ss << ")\n"; + else + ss << ",\n"; } } } diff --git a/src/core/shadergen.h b/src/core/shadergen.h index 65d6bdc40..a00e2e5c9 100644 --- a/src/core/shadergen.h +++ b/src/core/shadergen.h @@ -25,15 +25,17 @@ protected: void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan); void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, bool push_constant_on_vulkan); - void DeclareTexture(std::stringstream& ss, const char* name, u32 index); + void DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled = false); void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned); void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list& attributes, u32 num_color_outputs, u32 num_texcoord_outputs, const std::initializer_list>& additional_outputs, - bool declare_vertex_id = false, const char* output_block_suffix = ""); + bool declare_vertex_id = false, const char* output_block_suffix = "", bool msaa = false, + bool ssaa = false); void DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, const std::initializer_list>& additional_inputs, - bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false); + bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false, + bool msaa = false, bool ssaa = false, bool declare_sample_id = false); HostDisplay::RenderAPI m_render_api; bool m_glsl; diff --git a/src/duckstation-qt/enhancementsettingswidget.cpp b/src/duckstation-qt/enhancementsettingswidget.cpp index 185765be7..444203f88 100644 --- a/src/duckstation-qt/enhancementsettingswidget.cpp +++ b/src/duckstation-qt/enhancementsettingswidget.cpp @@ -36,6 +36,9 @@ EnhancementSettingsWidget::EnhancementSettingsWidget(QtHostInterface* host_inter connect(m_ui.pgxpEnable, &QCheckBox::stateChanged, this, &EnhancementSettingsWidget::updatePGXPSettingsEnabled); updatePGXPSettingsEnabled(); + connect(m_ui.msaaMode, QOverload::of(&QComboBox::currentIndexChanged), this, + &EnhancementSettingsWidget::msaaModeChanged); + dialog->registerWidgetHelp( m_ui.disableInterlacing, tr("Disable Interlacing (force progressive render/scan)"), tr("Unchecked"), tr( @@ -47,6 +50,11 @@ EnhancementSettingsWidget::EnhancementSettingsWidget(QtHostInterface* host_inter tr("Setting this beyond 1x will enhance the resolution of rendered 3D polygons and lines. Only applies " "to the hardware backends.
This option is usually safe, with most games looking fine at " "higher resolutions. Higher resolutions require a more powerful GPU.")); + dialog->registerWidgetHelp( + m_ui.msaaMode, tr("Multisample Antialiasing"), tr("Disabled"), + tr("Uses multisample antialiasing for rendering 3D objects. Can smooth out jagged edges on polygons at a lower " + "cost to performance compared to increasing the resolution scale, but may be more likely to cause rendering " + "errors in some games. Only applies to the hardware backends.")); dialog->registerWidgetHelp( m_ui.trueColor, tr("True Color Rendering (24-bit, disables dithering)"), tr("Unchecked"), tr("Forces the precision of colours output to the console's framebuffer to use the full 8 bits of precision per " @@ -103,6 +111,14 @@ void EnhancementSettingsWidget::updateScaledDitheringEnabled() void EnhancementSettingsWidget::setupAdditionalUi() { QtUtils::FillComboBoxWithResolutionScales(m_ui.resolutionScale); + QtUtils::FillComboBoxWithMSAAModes(m_ui.msaaMode); + + const QVariant current_msaa_mode( + QtUtils::GetMSAAModeValue(static_cast(m_host_interface->GetIntSettingValue("GPU", "Multisamples", 1)), + m_host_interface->GetBoolSettingValue("GPU", "PerSampleShading", false))); + const int current_msaa_index = m_ui.msaaMode->findData(current_msaa_mode); + if (current_msaa_index >= 0) + m_ui.msaaMode->setCurrentIndex(current_msaa_index); for (u32 i = 0; i < static_cast(GPUTextureFilter::Count); i++) { @@ -117,3 +133,13 @@ void EnhancementSettingsWidget::updatePGXPSettingsEnabled() m_ui.pgxpCulling->setEnabled(enabled); m_ui.pgxpTextureCorrection->setEnabled(enabled); } + +void EnhancementSettingsWidget::msaaModeChanged(int index) +{ + uint multisamples; + bool ssaa; + QtUtils::DecodeMSAAModeValue(m_ui.msaaMode->itemData(index), &multisamples, &ssaa); + m_host_interface->SetIntSettingValue("GPU", "Multisamples", static_cast(multisamples)); + m_host_interface->SetBoolSettingValue("GPU", "PerSampleShading", ssaa); + m_host_interface->applySettings(false); +} diff --git a/src/duckstation-qt/enhancementsettingswidget.h b/src/duckstation-qt/enhancementsettingswidget.h index 2d9b7e193..2abc0e6b2 100644 --- a/src/duckstation-qt/enhancementsettingswidget.h +++ b/src/duckstation-qt/enhancementsettingswidget.h @@ -18,6 +18,7 @@ public: private Q_SLOTS: void updateScaledDitheringEnabled(); void updatePGXPSettingsEnabled(); + void msaaModeChanged(int index); private: void setupAdditionalUi(); diff --git a/src/duckstation-qt/enhancementsettingswidget.ui b/src/duckstation-qt/enhancementsettingswidget.ui index 4baec1a8e..51d3b7132 100644 --- a/src/duckstation-qt/enhancementsettingswidget.ui +++ b/src/duckstation-qt/enhancementsettingswidget.ui @@ -26,129 +26,139 @@ 0 - - - - Rendering Enhancements - - - - - - Internal Resolution Scale: - - - - - - - - - - Texture Filtering: - - - - - - - - - - True Color Rendering (24-bit, disables dithering) - - - - - - - Scaled Dithering (scale dither pattern to resolution) - - - - - - - Widescreen Hack (render 3D in 16:9) - - - - - - - - - - Display Enhancements - - - - - - Disable Interlacing (force progressive render/scan) - - - - - - - Force NTSC Timings (60hz-on-PAL) - - - - - - - Force 4:3 For 24-Bit Display (disable widescreen for FMVs) - - - - - - - - - - PGXP (Precision Geometry Transform Pipeline) - - - - - - Geometry Correction - - - - - - - Culling Correction - - - - - - - Texture Correction - - - - - - - - - - Qt::Vertical - - - - 20 - 40 - - - - + + + + Rendering Enhancements + + + + + + Internal Resolution Scale: + + + + + + + + + + Texture Filtering: + + + + + + + + + + True Color Rendering (24-bit, disables dithering) + + + + + + + Scaled Dithering (scale dither pattern to resolution) + + + + + + + Widescreen Hack (render 3D in 16:9) + + + + + + + Multisample Antialiasing: + + + + + + + + + + + + + Display Enhancements + + + + + + Disable Interlacing (force progressive render/scan) + + + + + + + Force NTSC Timings (60hz-on-PAL) + + + + + + + Force 4:3 For 24-Bit Display (disable widescreen for FMVs) + + + + + + + + + + PGXP (Precision Geometry Transform Pipeline) + + + + + + Geometry Correction + + + + + + + Culling Correction + + + + + + + Texture Correction + + + + + + + + + + Qt::Vertical + + + + 20 + 40 + + + + diff --git a/src/duckstation-qt/qtutils.cpp b/src/duckstation-qt/qtutils.cpp index 9d7791586..a16a49e6b 100644 --- a/src/duckstation-qt/qtutils.cpp +++ b/src/duckstation-qt/qtutils.cpp @@ -677,7 +677,7 @@ void FillComboBoxWithResolutionScales(QComboBox* cb) cb->addItem(qApp->translate("GPUSettingsWidget", "6x (for 1440p)")); cb->addItem(qApp->translate("GPUSettingsWidget", "7x")); cb->addItem(qApp->translate("GPUSettingsWidget", "8x")); - cb->addItem(qApp->translate("GPUSettingsWidget", "9x")); + cb->addItem(qApp->translate("GPUSettingsWidget", "9x (for 4K)")); cb->addItem(qApp->translate("GPUSettingsWidget", "10x")); cb->addItem(qApp->translate("GPUSettingsWidget", "11x")); cb->addItem(qApp->translate("GPUSettingsWidget", "12x")); @@ -687,4 +687,36 @@ void FillComboBoxWithResolutionScales(QComboBox* cb) cb->addItem(qApp->translate("GPUSettingsWidget", "16x")); } +QVariant GetMSAAModeValue(uint multisamples, bool ssaa) +{ + const uint userdata = (multisamples & 0x7FFFFFFFu) | (static_cast(ssaa) << 31); + return QVariant(userdata); +} + +void DecodeMSAAModeValue(const QVariant& userdata, uint* multisamples, bool* ssaa) +{ + bool ok; + const uint value = userdata.toUInt(&ok); + if (!ok || value == 0) + { + *multisamples = 1; + *ssaa = false; + return; + } + + *multisamples = value & 0x7FFFFFFFu; + *ssaa = (value & (1u << 31)) != 0u; +} + +void FillComboBoxWithMSAAModes(QComboBox* cb) +{ + cb->addItem(qApp->translate("GPUSettingsWidget", "Disabled"), GetMSAAModeValue(1, false)); + + for (uint i = 2; i <= 32; i *= 2) + cb->addItem(qApp->translate("GPUSettingsWidget", "%1x MSAA").arg(i), GetMSAAModeValue(i, false)); + + for (uint i = 2; i <= 32; i *= 2) + cb->addItem(qApp->translate("GPUSettingsWidget", "%1x SSAA").arg(i), GetMSAAModeValue(i, true)); +} + } // namespace QtUtils \ No newline at end of file diff --git a/src/duckstation-qt/qtutils.h b/src/duckstation-qt/qtutils.h index 32025738e..4e883dd60 100644 --- a/src/duckstation-qt/qtutils.h +++ b/src/duckstation-qt/qtutils.h @@ -16,6 +16,7 @@ class QFrame; class QKeyEvent; class QTableView; class QTreeView; +class QVariant; class QWidget; class QUrl; @@ -62,4 +63,9 @@ void OpenURL(QWidget* parent, const char* url); /// Fills a combo box with resolution scale options. void FillComboBoxWithResolutionScales(QComboBox* cb); +/// Fills a combo box with multisampling options. +QVariant GetMSAAModeValue(uint multisamples, bool ssaa); +void DecodeMSAAModeValue(const QVariant& userdata, uint* multisamples, bool* ssaa); +void FillComboBoxWithMSAAModes(QComboBox* cb); + } // namespace QtUtils \ No newline at end of file diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index d32688d1a..cea74d012 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -936,6 +936,47 @@ void SDLHostInterface::DrawQuickSettingsMenu() ImGui::EndMenu(); } + if (ImGui::BeginMenu("Multisampling")) + { + const u32 current_multisamples = m_settings_copy.gpu_multisamples; + const bool current_ssaa = m_settings_copy.gpu_per_sample_shading; + + if (ImGui::MenuItem("None", nullptr, (current_multisamples == 1))) + { + m_settings_copy.gpu_multisamples = 1; + m_settings_copy.gpu_per_sample_shading = false; + settings_changed = true; + } + + for (u32 i = 2; i <= 32; i *= 2) + { + char buf[32]; + std::snprintf(buf, sizeof(buf), "%ux MSAA", i); + + if (ImGui::MenuItem(buf, nullptr, (current_multisamples == i && !current_ssaa))) + { + m_settings_copy.gpu_multisamples = i; + m_settings_copy.gpu_per_sample_shading = false; + settings_changed = true; + } + } + + for (u32 i = 2; i <= 32; i *= 2) + { + char buf[32]; + std::snprintf(buf, sizeof(buf), "%ux SSAA", i); + + if (ImGui::MenuItem(buf, nullptr, (current_multisamples == i && current_ssaa))) + { + m_settings_copy.gpu_multisamples = i; + m_settings_copy.gpu_per_sample_shading = true; + settings_changed = true; + } + } + + ImGui::EndMenu(); + } + if (ImGui::BeginMenu("PGXP")) { settings_changed |= ImGui::MenuItem("PGXP Enabled", nullptr, &m_settings_copy.gpu_pgxp_enable); @@ -947,9 +988,8 @@ void SDLHostInterface::DrawQuickSettingsMenu() m_settings_copy.gpu_pgxp_enable); settings_changed |= ImGui::MenuItem("PGXP CPU Instructions", nullptr, &m_settings_copy.gpu_pgxp_cpu, m_settings_copy.gpu_pgxp_enable); - settings_changed |= - ImGui::MenuItem("PGXP Preserve Projection Precision", nullptr, &m_settings_copy.gpu_pgxp_preserve_proj_fp, - m_settings_copy.gpu_pgxp_enable); + settings_changed |= ImGui::MenuItem("PGXP Preserve Projection Precision", nullptr, + &m_settings_copy.gpu_pgxp_preserve_proj_fp, m_settings_copy.gpu_pgxp_enable); ImGui::EndMenu(); } diff --git a/src/frontend-common/d3d11_host_display.cpp b/src/frontend-common/d3d11_host_display.cpp index b178b8380..a9be30010 100644 --- a/src/frontend-common/d3d11_host_display.cpp +++ b/src/frontend-common/d3d11_host_display.cpp @@ -809,7 +809,7 @@ bool D3D11HostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 ta if (m_post_processing_input_texture.GetWidth() != target_width || m_post_processing_input_texture.GetHeight() != target_height) { - if (!m_post_processing_input_texture.Create(m_device.Get(), target_width, target_height, format, bind_flags)) + if (!m_post_processing_input_texture.Create(m_device.Get(), target_width, target_height, 1, format, bind_flags)) return false; } @@ -819,7 +819,7 @@ bool D3D11HostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 ta PostProcessingStage& pps = m_post_processing_stages[i]; if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) { - if (!pps.output_texture.Create(m_device.Get(), target_width, target_height, format, bind_flags)) + if (!pps.output_texture.Create(m_device.Get(), target_width, target_height, 1, format, bind_flags)) return false; } } diff --git a/src/frontend-common/opengl_host_display.cpp b/src/frontend-common/opengl_host_display.cpp index 199d7a765..435375233 100644 --- a/src/frontend-common/opengl_host_display.cpp +++ b/src/frontend-common/opengl_host_display.cpp @@ -496,9 +496,9 @@ void OpenGLHostDisplay::RenderDisplay() } #endif - RenderDisplay(left, GetWindowHeight() - top - height, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, m_display_linear_filtering); + RenderDisplay(left, GetWindowHeight() - top - height, width, height, m_display_texture_handle, + m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, + m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); } void OpenGLHostDisplay::RenderDisplay(s32 left, s32 bottom, s32 width, s32 height, void* texture_handle, @@ -629,7 +629,7 @@ bool OpenGLHostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 t if (m_post_processing_input_texture.GetWidth() != target_width || m_post_processing_input_texture.GetHeight() != target_height) { - if (!m_post_processing_input_texture.Create(target_width, target_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE) || + if (!m_post_processing_input_texture.Create(target_width, target_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE) || !m_post_processing_input_texture.CreateFramebuffer()) { return false; @@ -642,7 +642,7 @@ bool OpenGLHostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 t PostProcessingStage& pps = m_post_processing_stages[i]; if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) { - if (!pps.output_texture.Create(target_width, target_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE) || + if (!pps.output_texture.Create(target_width, target_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE) || !pps.output_texture.CreateFramebuffer()) { return false;