diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index b4861aaee..d639f7e49 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -153,6 +153,7 @@ protected: u32 m_resolution_scale = 1; u32 m_max_resolution_scale = 1; bool m_true_color = false; + bool m_supports_dual_source_blend = false; BatchConfig m_batch = {}; BatchUBOData m_batch_ubo_data = {}; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 0e11057bd..6c829846c 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -144,6 +144,8 @@ void GPU_HW_D3D11::SetCapabilities() m_max_resolution_scale = max_texture_scale; Log_InfoPrintf("Maximum resolution scale is %u", m_max_resolution_scale); + + m_supports_dual_source_blend = true; } bool GPU_HW_D3D11::CreateFramebuffer() @@ -237,7 +239,8 @@ bool GPU_HW_D3D11::CreateBatchInputLayout() {"ATTR", 3, DXGI_FORMAT_R32_SINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; // we need a vertex shader... - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, + m_supports_dual_source_blend); ComPtr vs_bytecode = D3D11::ShaderCompiler::CompileShader( D3D11::ShaderCompiler::Type::Vertex, m_device->GetFeatureLevel(), shadergen.GenerateBatchVertexShader(true), false); if (!vs_bytecode) @@ -295,7 +298,7 @@ bool GPU_HW_D3D11::CreateStateObjects() { bl_desc.RenderTarget[0].BlendEnable = TRUE; bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; - bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC_ALPHA; + bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA; bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; bl_desc.RenderTarget[0].BlendOp = @@ -315,7 +318,8 @@ bool GPU_HW_D3D11::CreateStateObjects() bool GPU_HW_D3D11::CompileShaders() { const bool debug = true; - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, + m_supports_dual_source_blend); m_screen_quad_vertex_shader = D3D11::ShaderCompiler::CompileAndCreateVertexShader( m_device.Get(), shadergen.GenerateScreenQuadVertexShader(), debug); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index e95600124..eb73d9016 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -144,6 +144,12 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) { Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower."); } + + int max_dual_source_draw_buffers = 0; + glGetIntegerv(GL_MAX_DUAL_SOURCE_DRAW_BUFFERS, &max_dual_source_draw_buffers); + m_supports_dual_source_blend = (max_dual_source_draw_buffers > 0); + if (!m_supports_dual_source_blend) + Log_WarningPrintf("Dual-source blending is not supported, this may break some mask effects."); } void GPU_HW_OpenGL::CreateFramebuffer() @@ -256,7 +262,8 @@ void GPU_HW_OpenGL::CreateTextureBuffer() bool GPU_HW_OpenGL::CompilePrograms() { - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, + m_supports_dual_source_blend); for (u32 render_mode = 0; render_mode < 4; render_mode++) { @@ -310,7 +317,17 @@ bool GPU_HW_OpenGL::CompilePrograms() return false; if (!m_is_gles) - prog.BindFragData(0, "o_col0"); + { + if (m_supports_dual_source_blend) + { + prog.BindFragDataIndexed(0, "o_col0"); + prog.BindFragDataIndexed(1, "o_col1"); + } + else + { + prog.BindFragData(0, "o_col0"); + } + } if (!prog.Link()) return false; @@ -381,7 +398,7 @@ void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode) glBlendEquationSeparate( m_batch.transparency_mode == TransparencyMode::BackgroundMinusForeground ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD, GL_FUNC_ADD); - glBlendFuncSeparate(GL_ONE, GL_SRC_ALPHA, GL_ONE, GL_ZERO); + glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO); } if (m_drawing_area_changed) diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 256e94ab8..eae80eda4 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1,9 +1,10 @@ #include "gpu_hw_shadergen.h" #include -GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color) +GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, + bool supports_dual_source_blend) : m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color), - m_glsl(render_api != HostDisplay::RenderAPI::D3D11) + m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend) { } @@ -342,6 +343,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit; const bool textured = (texture_mode != GPU::TextureMode::Disabled); + const bool use_dual_source = + m_supports_dual_source_blend && transparency != GPU_HW::BatchRenderMode::TransparencyDisabled; std::stringstream ss; WriteHeader(ss); @@ -357,6 +360,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "RAW_TEXTURE", raw_texture); DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "TRUE_COLOR", m_true_color); + DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source); WriteCommonFunctions(ss); WriteBatchUniformBuffer(ss); @@ -451,11 +455,11 @@ int4 SampleFromVRAM(int4 texpage, float2 coord) if (textured) { - DeclareFragmentEntryPoint(ss, 1, 1, {"nointerpolation in int4 v_texpage"}, true, false); + DeclareFragmentEntryPoint(ss, 1, 1, {"nointerpolation in int4 v_texpage"}, true, use_dual_source); } else { - DeclareFragmentEntryPoint(ss, 1, 0, {}, true, false); + DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source); } ss << R"( @@ -500,6 +504,9 @@ int4 SampleFromVRAM(int4 texpage, float2 coord) icolor = TruncateTo15Bit(icolor); #endif + // Compute output alpha (mask bit) + float output_alpha = float(semitransparent); + // Normalize float3 color = float3(icolor) / float3(255.0, 255.0, 255.0); @@ -510,17 +517,34 @@ int4 SampleFromVRAM(int4 texpage, float2 coord) #if TRANSPARENCY_ONLY_OPAQUE discard; #endif - o_col0 = float4(color * u_src_alpha_factor, u_dst_alpha_factor); + + #if USE_DUAL_SOURCE + o_col0 = float4(color * u_src_alpha_factor, output_alpha); + o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor); + #else + o_col0 = float4(color * u_src_alpha_factor, u_dst_alpha_factor); + #endif } else { #if TRANSPARENCY_ONLY_TRANSPARENCY discard; #endif - o_col0 = float4(color, 0.0); + + #if USE_DUAL_SOURCE + o_col0 = float4(color, output_alpha); + o_col1 = float4(0.0, 0.0, 0.0, 0.0); + #else + o_col0 = float4(color, 0.0); + #endif } #else - o_col0 = float4(color, 0.0); + // Non-transparency won't enable blending so we can write the mask here regardless. + o_col0 = float4(color, output_alpha); + + #if USE_DUAL_SOURCE + o_col1 = float4(0.0, 0.0, 0.0, 0.0); + #endif #endif } )"; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 769a987ea..933bf01f9 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -7,7 +7,8 @@ class GPU_HW_ShaderGen { public: - GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color); + GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, + bool supports_dual_source_belnd); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); @@ -26,6 +27,7 @@ public: bool m_true_color; bool m_glsl; bool m_glsl_es; + bool m_supports_dual_source_blend; private: void WriteHeader(std::stringstream& ss);