From 197b193ca369847e2c25ee8bbbfd81fb83b6b946 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 16 Apr 2020 21:29:11 +1000 Subject: [PATCH] GPU/HW/OpenGL: Use geometry shaders for upscaled line rendering --- src/core/gpu_hw_opengl.cpp | 55 ++++++++++---- src/core/gpu_hw_opengl.h | 5 +- src/core/gpu_hw_shadergen.cpp | 137 +++++++++++++++++++++++++++------- src/core/gpu_hw_shadergen.h | 7 +- 4 files changed, 157 insertions(+), 47 deletions(-) diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 34fe2f3f9..9da7ad434 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -91,7 +91,8 @@ void GPU_HW_OpenGL::ResetGraphicsAPIState() glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); glDepthMask(GL_TRUE); - glLineWidth(1.0f); + if (m_resolution_scale > 1 && !m_supports_geometry_shaders) + glLineWidth(1.0f); glBindVertexArray(0); } @@ -104,7 +105,8 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState() glDisable(GL_DEPTH_TEST); glEnable(GL_SCISSOR_TEST); glDepthMask(GL_FALSE); - glLineWidth(static_cast(m_resolution_scale)); + if (m_resolution_scale > 1 && !m_supports_geometry_shaders) + glLineWidth(static_cast(m_resolution_scale)); glBindVertexArray(m_vao_id); SetScissorFromDrawingArea(); @@ -148,13 +150,7 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) GLint max_texture_size = VRAM_WIDTH; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size); - const int max_texture_scale = max_texture_size / VRAM_WIDTH; - - std::array line_width_range = {{1, 1}}; - glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_width_range.data()); - Log_InfoPrintf("Max line width: %d", line_width_range[1]); - - m_max_resolution_scale = std::min(max_texture_scale, line_width_range[1]); + m_max_resolution_scale = static_cast(max_texture_size / VRAM_WIDTH); glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment); @@ -180,6 +176,19 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) m_supports_dual_source_blend = (max_dual_source_draw_buffers > 0); if (!m_supports_dual_source_blend) Log_WarningPrintf("Dual-source blending is not supported, this may break some mask effects."); + + m_supports_geometry_shaders = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_geometry_shader4 || GLAD_GL_ES_VERSION_3_2; + if (!m_supports_geometry_shaders) + { + Log_WarningPrintf("Geometry shaders are not supported, line rendering at higher resolutions may be incorrect. We " + "will try to use glLineWidth() to emulate this, but the accuracy depends on your driver."); + + std::array line_width_range = {{1, 1}}; + glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_width_range.data()); + Log_InfoPrintf("Max line width: %d", line_width_range[1]); + + m_max_resolution_scale = std::min(m_max_resolution_scale, line_width_range[1]); + } } bool GPU_HW_OpenGL::CreateFramebuffer() @@ -305,12 +314,12 @@ bool GPU_HW_OpenGL::CompilePrograms() for (u8 interlacing = 0; interlacing < 2; interlacing++) { const bool textured = (static_cast(texture_mode) != TextureMode::Disabled); - const std::string vs = shadergen.GenerateBatchVertexShader(textured); + const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured); const std::string fs = shadergen.GenerateBatchFragmentShader( static_cast(render_mode), static_cast(texture_mode), ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); - std::optional prog = m_shader_cache.GetProgram(vs, {}, fs, [this, textured](GL::Program& prog) { + const auto link_callback = [this, textured](GL::Program& prog) { prog.BindAttribute(0, "a_pos"); prog.BindAttribute(1, "a_col0"); if (textured) @@ -321,7 +330,9 @@ bool GPU_HW_OpenGL::CompilePrograms() if (!m_is_gles) prog.BindFragData(0, "o_col0"); - }); + }; + + std::optional prog = m_shader_cache.GetProgram(batch_vs, {}, fs, link_callback); if (!prog) return false; @@ -333,6 +344,18 @@ bool GPU_HW_OpenGL::CompilePrograms() } m_render_programs[render_mode][texture_mode][dithering][interlacing] = std::move(*prog); + + if (!textured && m_supports_geometry_shaders) + { + const std::string line_expand_gs = shadergen.GenerateBatchLineExpandGeometryShader(); + + prog = m_shader_cache.GetProgram(batch_vs, line_expand_gs, fs, link_callback); + if (!prog) + return false; + + prog->BindUniformBlock("UBOBlock", 1); + m_line_render_programs[render_mode][dithering][interlacing] = std::move(*prog); + } } } } @@ -418,8 +441,12 @@ bool GPU_HW_OpenGL::CompilePrograms() void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode) { - const GL::Program& prog = m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)] - [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; + const GL::Program& prog = + ((m_batch.primitive < BatchPrimitive::Triangles && m_supports_geometry_shaders && m_resolution_scale > 1) ? + m_line_render_programs[static_cast(render_mode)][BoolToUInt8(m_batch.dithering)] + [BoolToUInt8(m_batch.interlacing)] : + m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)] + [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]); prog.Bind(); if (m_batch.texture_mode != TextureMode::Disabled) diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 4827c3fbe..0538f0ebb 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -77,7 +77,9 @@ private: GLuint m_texture_buffer_r16ui_texture = 0; std::array, 2>, 9>, 4> - m_render_programs; // [render_mode][texture_mode][dithering][interlacing] + m_render_programs; // [render_mode][texture_mode][dithering][interlacing] + std::array, 2>, 4> + m_line_render_programs; // [render_mode][dithering][interlacing] std::array, 2> m_display_programs; // [depth_24][interlaced] GL::Program m_vram_interlaced_fill_program; GL::Program m_vram_read_program; @@ -88,4 +90,5 @@ private: bool m_is_gles = false; bool m_supports_texture_buffer = false; + bool m_supports_geometry_shaders = false; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 8bbf023fa..0b3706c25 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -9,10 +9,15 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolu bool scaled_dithering, bool texture_filtering, bool supports_dual_source_blend) : m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color), m_scaled_dithering(scaled_dithering), m_texture_filering(texture_filtering), - m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend) + m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend), + m_use_glsl_interface_blocks(false) { if (m_glsl) + { SetGLSLVersionString(); + + m_use_glsl_interface_blocks = (GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2); + } } GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default; @@ -216,25 +221,40 @@ void GPU_HW_ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* n } } -void GPU_HW_ShaderGen::DeclareVertexEntryPoint(std::stringstream& ss, - const std::initializer_list& attributes, - u32 num_color_outputs, u32 num_texcoord_outputs, - const std::initializer_list& additional_outputs, - bool declare_vertex_id) +void GPU_HW_ShaderGen::DeclareVertexEntryPoint( + std::stringstream& ss, const std::initializer_list& attributes, u32 num_color_outputs, + u32 num_texcoord_outputs, const std::initializer_list>& additional_outputs, + bool declare_vertex_id) { if (m_glsl) { for (const char* attribute : attributes) ss << "in " << attribute << ";\n"; - for (u32 i = 0; i < num_color_outputs; i++) - ss << "out float4 v_col" << i << ";\n"; + if (m_use_glsl_interface_blocks) + { + ss << "out VertexData {\n"; + for (u32 i = 0; i < num_color_outputs; i++) + ss << " float4 v_col" << i << ";\n"; - for (u32 i = 0; i < num_texcoord_outputs; i++) - ss << "out float2 v_tex" << i << ";\n"; + for (u32 i = 0; i < num_texcoord_outputs; i++) + ss << " float2 v_tex" << i << ";\n"; - for (const char* output : additional_outputs) - ss << output << ";\n"; + for (const auto [qualifiers, name] : additional_outputs) + ss << " " << qualifiers << " " << name << ";\n"; + ss << "};\n"; + } + else + { + for (u32 i = 0; i < num_color_outputs; i++) + ss << "out float4 v_col" << i << ";\n"; + + for (u32 i = 0; i < num_texcoord_outputs; i++) + ss << "out float2 v_tex" << i << ";\n"; + + for (const auto [qualifiers, name] : additional_outputs) + ss << qualifiers << " out " << name << ";\n"; + } ss << "#define v_pos gl_Position\n\n"; if (declare_vertex_id) @@ -264,9 +284,9 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(std::stringstream& ss, ss << " out float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; u32 additional_counter = num_texcoord_outputs; - for (const char* output : additional_outputs) + for (const auto [qualifiers, name] : additional_outputs) { - ss << " " << output << " : TEXCOORD" << additional_counter << ",\n"; + ss << " " << qualifiers << " out " << name << " : TEXCOORD" << additional_counter << ",\n"; additional_counter++; } @@ -274,20 +294,37 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(std::stringstream& ss, } } -void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, - const std::initializer_list& additional_inputs, - bool declare_fragcoord, bool dual_color_output) +void GPU_HW_ShaderGen::DeclareFragmentEntryPoint( + std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, + const std::initializer_list>& additional_inputs, + bool declare_fragcoord /* = false */, bool dual_color_output /* = false */) { if (m_glsl) { - for (u32 i = 0; i < num_color_inputs; i++) - ss << "in float4 v_col" << i << ";\n"; + if (m_use_glsl_interface_blocks) + { + ss << "in VertexData {\n"; + for (u32 i = 0; i < num_color_inputs; i++) + ss << " float4 v_col" << i << ";\n"; - for (u32 i = 0; i < num_texcoord_inputs; i++) - ss << "in float2 v_tex" << i << ";\n"; + for (u32 i = 0; i < num_texcoord_inputs; i++) + ss << " float2 v_tex" << i << ";\n"; - for (const char* input : additional_inputs) - ss << input << ";\n"; + for (const auto [qualifiers, name] : additional_inputs) + ss << " " << qualifiers << " " << name << ";\n"; + ss << "};\n"; + } + else + { + for (u32 i = 0; i < num_color_inputs; i++) + ss << "in float4 v_col" << i << ";\n"; + + for (u32 i = 0; i < num_texcoord_inputs; i++) + ss << "in float2 v_tex" << i << ";\n"; + + for (const auto [qualifiers, name] : additional_inputs) + ss << qualifiers << " in " << name << ";\n"; + } if (declare_fragcoord) ss << "#define v_pos gl_FragCoord\n"; @@ -312,9 +349,9 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_ ss << " in float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; u32 additional_counter = num_texcoord_inputs; - for (const char* output : additional_inputs) + for (const auto [qualifiers, name] : additional_inputs) { - ss << " " << output << " : TEXCOORD" << additional_counter << ",\n"; + ss << " " << qualifiers << " in " << name << " : TEXCOORD" << additional_counter << ",\n"; additional_counter++; } @@ -353,7 +390,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) if (textured) { DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0", "int a_texcoord", "int a_texpage"}, 1, 1, - {"nointerpolation out int4 v_texpage"}); + {{"nointerpolation", "int4 v_texpage"}}); } else { @@ -498,7 +535,7 @@ float4 SampleFromVRAM(int4 texpage, int2 icoord) if (textured) { - DeclareFragmentEntryPoint(ss, 1, 1, {"nointerpolation in int4 v_texpage"}, true, use_dual_source); + DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "int4 v_texpage"}}, true, use_dual_source); } else { @@ -638,15 +675,57 @@ std::string GPU_HW_ShaderGen::GenerateBatchLineExpandGeometryShader() WriteHeader(ss); WriteCommonFunctions(ss); + ss << R"( +CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RESOLUTION_SCALE); +)"; + // GS is a pain, too different between HLSL and GLSL... if (m_glsl) { + ss << R"( +in VertexData { + float4 v_col0; +} in_data[]; + +out VertexData { + float4 v_col0; +} out_data; + +layout(lines) in; +layout(triangle_strip, max_vertices = 4) out; + +void main() { + float2 dir = normalize(gl_in[1].gl_Position.xy - gl_in[0].gl_Position.xy); + float2 normal = cross(float3(dir, 0.0), float3(0.0, 0.0, 1.0)).xy * WIDTH; + float4 offset = float4(normal, 0.0, 0.0); + + // top-left + out_data.v_col0 = in_data[0].v_col0; + gl_Position = gl_in[0].gl_Position - offset; + EmitVertex(); + + // top-right + out_data.v_col0 = in_data[0].v_col0; + gl_Position = gl_in[0].gl_Position + offset; + EmitVertex(); + + // bottom-left + out_data.v_col0 = in_data[1].v_col0; + gl_Position = gl_in[1].gl_Position - offset; + EmitVertex(); + + // bottom-right + out_data.v_col0 = in_data[1].v_col0; + gl_Position = gl_in[1].gl_Position + offset; + EmitVertex(); + + EndPrimitive(); +} +)"; } else { ss << R"( -CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RESOLUTION_SCALE); - struct Vertex { float4 col0 : COLOR0; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 60b19c3ad..f9e49af76 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -8,7 +8,7 @@ class GPU_HW_ShaderGen { public: GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, bool scaled_dithering, - bool texture_filtering, bool supports_dual_source_belnd); + bool texture_filtering, bool supports_dual_source_blend); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); @@ -30,6 +30,7 @@ public: bool m_texture_filering; bool m_glsl; bool m_supports_dual_source_blend; + bool m_use_glsl_interface_blocks; std::string m_glsl_version_string; @@ -41,10 +42,10 @@ private: void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned); void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list& attributes, u32 num_color_outputs, u32 num_texcoord_outputs, - const std::initializer_list& additional_outputs, + const std::initializer_list>& additional_outputs, bool declare_vertex_id = false); void DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, - const std::initializer_list& additional_inputs, + const std::initializer_list>& additional_inputs, bool declare_fragcoord = false, bool dual_color_output = false); void WriteCommonFunctions(std::stringstream& ss);