diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index b01c955c5..98277d5a6 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -75,6 +75,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display) m_texture_filtering = GPUTextureFilter::Nearest; } + m_pgxp_depth_buffer = g_settings.gpu_pgxp_depth_buffer; PrintSettingsToLog(); return true; } @@ -123,7 +124,8 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading || m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter || - m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing); + m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || + m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); if (m_resolution_scale != resolution_scale) { @@ -161,6 +163,14 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) if (!m_supports_dual_source_blend && TextureFilterRequiresDualSourceBlend(m_texture_filtering)) m_texture_filtering = GPUTextureFilter::Nearest; + if (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()) + { + m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_batch.use_depth_buffer = false; + if (m_pgxp_depth_buffer) + ClearDepthBuffer(); + } + PrintSettingsToLog(); } @@ -202,6 +212,7 @@ void GPU_HW::PrintSettingsToLog() Log_InfoPrintf("Texture Filtering: %s", Settings::GetTextureFilterDisplayName(m_texture_filtering)); Log_InfoPrintf("Dual-source blending: %s", m_supports_dual_source_blend ? "Supported" : "Not supported"); Log_InfoPrintf("Using UV limits: %s", m_using_uv_limits ? "YES" : "NO"); + Log_InfoPrintf("Depth buffer: %s", m_pgxp_depth_buffer ? "YES" : "NO"); } void GPU_HW::UpdateVRAMReadTexture() @@ -320,6 +331,44 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); } +void GPU_HW::SetBatchDepthBuffer(bool enabled) +{ + if (m_batch.use_depth_buffer == enabled) + return; + + if (GetBatchVertexCount() > 0) + { + FlushRender(); + EnsureVertexBufferSpaceForCurrentCommand(); + } + + m_batch.use_depth_buffer = enabled; + m_last_depth_z = 1.0f; +} + +void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) +{ + DebugAssert(num_vertices == 3 || num_vertices == 4); + float average_z; + if (num_vertices == 3) + average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w) / 3.0f, 1.0f); + else + average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f); + + if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) + { + if (GetBatchVertexCount() > 0) + { + FlushRender(); + EnsureVertexBufferSpaceForCurrentCommand(); + } + + ClearDepthBuffer(); + } + + m_last_depth_z = average_z; +} + void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth) { const float dx = x1 - x0; @@ -453,10 +502,19 @@ void GPU_HW::LoadVertices() m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w); } } - if (!valid_w) + if (pgxp) { - for (BatchVertex& v : vertices) - v.w = 1.0f; + if (!valid_w) + { + SetBatchDepthBuffer(false); + for (BatchVertex& v : vertices) + v.w = 1.0f; + } + else if (g_settings.gpu_pgxp_depth_buffer) + { + SetBatchDepthBuffer(true); + CheckForDepthClear(vertices.data(), num_vertices); + } } if (rc.quad_polygon && m_resolution_scale > 1) @@ -580,12 +638,13 @@ void GPU_HW::LoadVertices() break; } - // we can split the rectangle up into potentially 8 quads - DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE); - if (!IsDrawingAreaIsValid()) return; + // we can split the rectangle up into potentially 8 quads + SetBatchDepthBuffer(false); + DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE); + // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. u16 tex_top = orig_tex_top; for (s32 y_offset = 0; y_offset < rectangle_height;) @@ -634,6 +693,8 @@ void GPU_HW::LoadVertices() case GPUPrimitive::Line: { + SetBatchDepthBuffer(false); + if (!rc.polyline) { DebugAssert(GetBatchVertexSpace() >= 2); @@ -760,6 +821,10 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 VRAMFillUBOData uniforms; std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = RGBA8ToFloat(color); + + if (m_pgxp_depth_buffer) + uniforms.u_fill_color[3] = 1.0f; + uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); return uniforms; } @@ -879,6 +944,9 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() void GPU_HW::ResetBatchVertexDepth() { + if (m_pgxp_depth_buffer) + return; + Log_PerfPrint("Resetting batch vertex depth"); FlushRender(); UpdateDepthBufferFromMaskBit(); @@ -1023,6 +1091,9 @@ void GPU_HW::FlushRender() { m_drawing_area_changed = false; SetScissorFromDrawingArea(); + + if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) + ClearDepthBuffer(); } if (m_batch_ubo_dirty) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 5fc0f1554..8abf1121a 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -100,6 +100,7 @@ protected: bool interlacing; bool set_mask_while_drawing; bool check_mask_before_draw; + bool use_depth_buffer; // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled // on a per-pixel basis, and the opaque pixels shouldn't be blended at all. @@ -179,6 +180,7 @@ protected: virtual void UpdateVRAMReadTexture(); virtual void UpdateDepthBufferFromMaskBit() = 0; + virtual void ClearDepthBuffer() = 0; virtual void SetScissorFromDrawingArea() = 0; virtual void MapBatchVertexPointer(u32 required_vertices) = 0; virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0; @@ -280,7 +282,10 @@ protected: /// Computes polygon U/V boundaries. static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); - static bool AreUVLimitsNeeded(); + + /// Sets the depth test flag for PGXP depth buffering. + void SetBatchDepthBuffer(bool enabled); + void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); HeapArray m_vram_shadow; @@ -289,6 +294,7 @@ protected: BatchVertex* m_batch_current_vertex_ptr = nullptr; u32 m_batch_base_vertex = 0; s32 m_current_depth = 0; + float m_last_depth_z = 1.0f; u32 m_resolution_scale = 1; u32 m_multisamples = 1; @@ -303,6 +309,7 @@ protected: bool m_supports_per_sample_shading = false; bool m_supports_dual_source_blend = false; bool m_using_uv_limits = false; + bool m_pgxp_depth_buffer = false; BatchConfig m_batch = {}; BatchUBOData m_batch_ubo_data = {}; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 3d81b47d6..90ccc5eb4 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -235,9 +235,10 @@ void GPU_HW_D3D11::ClearFramebuffer() { static constexpr std::array color = {}; m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data()); - m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 0.0f, 0); + m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, m_pgxp_depth_buffer ? 1.0f : 0.0f, 0); m_context->ClearRenderTargetView(m_display_texture, color.data()); SetFullVRAMDirtyRectangle(); + m_last_depth_z = 1.0f; } void GPU_HW_D3D11::DestroyFramebuffer() @@ -287,6 +288,7 @@ bool GPU_HW_D3D11::CreateStateObjects() rs_desc.CullMode = D3D11_CULL_NONE; rs_desc.ScissorEnable = TRUE; rs_desc.MultisampleEnable = IsUsingMultisampling(); + rs_desc.DepthClipEnable = FALSE; hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf()); if (FAILED(hr)) return false; @@ -316,11 +318,16 @@ bool GPU_HW_D3D11::CreateStateObjects() if (FAILED(hr)) return false; - ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL; + ds_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf()); if (FAILED(hr)) return false; + ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL; + hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_greater_state.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + return false; + CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf()); if (FAILED(hr)) @@ -377,6 +384,7 @@ void GPU_HW_D3D11::DestroyStateObjects() m_point_sampler_state.Reset(); m_blend_no_color_writes_state.Reset(); m_blend_disabled_state.Reset(); + m_depth_test_greater_state.Reset(); m_depth_test_less_state.Reset(); m_depth_test_always_state.Reset(); m_depth_disabled_state.Reset(); @@ -392,7 +400,7 @@ bool GPU_HW_D3D11::CompileShaders() GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_supports_dual_source_blend); + m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3); @@ -622,8 +630,12 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte const GPUTransparencyMode transparency_mode = (render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode; m_context->OMSetBlendState(m_batch_blend_states[static_cast(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu); + m_context->OMSetDepthStencilState( - m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); + (m_batch.use_depth_buffer ? + m_depth_test_less_state.Get() : + (m_batch.check_mask_before_draw ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get())), + 0); m_context->Draw(num_vertices, base_vertex); } @@ -798,7 +810,8 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask); - m_context->OMSetDepthStencilState(check_mask ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); + m_context->OMSetDepthStencilState( + (check_mask && !m_batch.use_depth_buffer) ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); // the viewport should already be set to the full vram, so just adjust the scissor @@ -825,13 +838,15 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight()); - m_context->OMSetDepthStencilState( - m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); + m_context->OMSetDepthStencilState((m_GPUSTAT.check_mask_before_draw && !m_batch.use_depth_buffer) ? + m_depth_test_greater_state.Get() : + m_depth_test_always_state.Get(), + 0); m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray()); DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms)); RestoreGraphicsAPIState(); - if (m_GPUSTAT.check_mask_before_draw) + if (m_GPUSTAT.check_mask_before_draw && !m_batch.use_depth_buffer) m_current_depth++; return; @@ -877,6 +892,9 @@ void GPU_HW_D3D11::UpdateVRAMReadTexture() void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit() { + if (m_pgxp_depth_buffer) + return; + SetViewportAndScissor(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); m_context->OMSetRenderTargets(0, nullptr, m_vram_depth_view.Get()); @@ -890,6 +908,14 @@ void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit() RestoreGraphicsAPIState(); } +void GPU_HW_D3D11::ClearDepthBuffer() +{ + DebugAssert(m_pgxp_depth_buffer); + + m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0); + m_last_depth_z = 1.0f; +} + std::unique_ptr GPU::CreateHardwareD3D11Renderer() { return std::make_unique(); diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index c5634d621..4c9dd3401 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -35,6 +35,7 @@ protected: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; + void ClearDepthBuffer() override; void SetScissorFromDrawingArea() override; void MapBatchVertexPointer(u32 required_vertices) override; void UnmapBatchVertexPointer(u32 used_vertices) override; @@ -94,6 +95,7 @@ private: ComPtr m_depth_disabled_state; ComPtr m_depth_test_always_state; ComPtr m_depth_test_less_state; + ComPtr m_depth_test_greater_state; ComPtr m_blend_disabled_state; ComPtr m_blend_no_color_writes_state; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 4f90518a7..fb092a936 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -124,6 +124,7 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState() m_uniform_stream_buffer->Bind(); m_vram_read_texture.Bind(); SetBlendMode(); + m_current_depth_test = 0; SetDepthFunc(); SetScissorFromDrawingArea(); m_batch_ubo_dirty = true; @@ -311,11 +312,14 @@ bool GPU_HW_OpenGL::CreateFramebuffer() void GPU_HW_OpenGL::ClearFramebuffer() { + const float depth_clear_value = m_pgxp_depth_buffer ? 1.0f : 0.0f; + glDisable(GL_SCISSOR_TEST); glClearColor(0.0f, 0.0f, 0.0f, 0.0f); - IsGLES() ? glClearDepthf(0.0f) : glClearDepth(0.0f); + IsGLES() ? glClearDepthf(depth_clear_value) : glClearDepth(depth_clear_value); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glEnable(GL_SCISSOR_TEST); + m_last_depth_z = 1.0f; SetFullVRAMDirtyRectangle(); } @@ -386,7 +390,7 @@ bool GPU_HW_OpenGL::CompilePrograms() const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout(); GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_supports_dual_source_blend); + m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5; @@ -592,11 +596,7 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert SetBlendMode(); } - if (m_current_check_mask_before_draw != m_batch.check_mask_before_draw) - { - m_current_check_mask_before_draw = m_batch.check_mask_before_draw; - SetDepthFunc(); - } + SetDepthFunc(); glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices); } @@ -620,7 +620,16 @@ void GPU_HW_OpenGL::SetBlendMode() void GPU_HW_OpenGL::SetDepthFunc() { - glDepthFunc(m_current_check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); + SetDepthFunc(m_batch.use_depth_buffer ? GL_LEQUAL : (m_batch.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS)); +} + +void GPU_HW_OpenGL::SetDepthFunc(GLenum func) +{ + if (m_current_depth_test == func) + return; + + glDepthFunc(func); + m_current_depth_test = func; } void GPU_HW_OpenGL::SetScissorFromDrawingArea() @@ -830,7 +839,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) m_vram_interlaced_fill_program.Bind(); UploadUniformBuffer(&uniforms, sizeof(uniforms)); glDisable(GL_BLEND); - glDepthFunc(GL_ALWAYS); + SetDepthFunc(GL_ALWAYS); glBindVertexArray(m_attributeless_vao_id); glDrawArrays(GL_TRIANGLES, 0, 3); @@ -852,7 +861,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* m_texture_stream_buffer->Unbind(); glDisable(GL_BLEND); - glDepthFunc(check_mask ? GL_GEQUAL : GL_ALWAYS); + SetDepthFunc((check_mask && !m_pgxp_depth_buffer) ? GL_GEQUAL : GL_ALWAYS); m_vram_write_program.Bind(); if (m_use_ssbo_for_vram_writes) @@ -961,7 +970,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); - glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); + SetDepthFunc((m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) ? GL_GEQUAL : GL_ALWAYS); const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); glViewport(dst_bounds_scaled.left, @@ -1056,6 +1065,9 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture() void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit() { + if (m_pgxp_depth_buffer) + return; + glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); @@ -1073,6 +1085,15 @@ void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit() m_vram_read_texture.Bind(); } +void GPU_HW_OpenGL::ClearDepthBuffer() +{ + glDisable(GL_SCISSOR_TEST); + IsGLES() ? glClearDepthf(1.0f) : glClearDepth(1.0f); + glClear(GL_DEPTH_BUFFER_BIT); + glEnable(GL_SCISSOR_TEST); + m_last_depth_z = 1.0f; +} + std::unique_ptr GPU::CreateHardwareOpenGLRenderer() { return std::make_unique(); diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 8b5d87fb6..74ecd1638 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -31,6 +31,7 @@ protected: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; + void ClearDepthBuffer() override; void SetScissorFromDrawingArea() override; void MapBatchVertexPointer(u32 required_vertices) override; void UnmapBatchVertexPointer(u32 used_vertices) override; @@ -63,6 +64,7 @@ private: bool CompilePrograms(); void SetDepthFunc(); + void SetDepthFunc(GLenum func); void SetBlendMode(); // downsample texture - used for readbacks at >1xIR. @@ -98,7 +100,7 @@ private: bool m_supports_geometry_shaders = false; bool m_use_ssbo_for_vram_writes = false; - bool m_current_check_mask_before_draw = false; + GLenum m_current_depth_test = 0; GPUTransparencyMode m_current_transparency_mode = GPUTransparencyMode::Disabled; BatchRenderMode m_current_render_mode = BatchRenderMode::TransparencyDisabled; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index a3dd7cdc3..808191e5e 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -5,10 +5,12 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, - GPUTextureFilter texture_filtering, bool uv_limits, bool supports_dual_source_blend) + GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth, + bool supports_dual_source_blend) : ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale), m_multisamples(multisamples), m_true_color(true_color), m_per_sample_shading(per_sample_shading), - m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits) + m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits), + m_pgxp_depth(pgxp_depth) { } @@ -84,6 +86,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) WriteHeader(ss); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "UV_LIMITS", m_uv_limits); + DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); WriteCommonFunctions(ss); WriteBatchUniformBuffer(ss); @@ -135,8 +138,15 @@ CONSTANT float TEX_EPSILON = 0.00001; // 0..+1023 -> -1..1 float pos_x = ((a_pos.x + vertex_offset) / 512.0) - 1.0; float pos_y = ((a_pos.y + vertex_offset) / -256.0) + 1.0; + +#if PGXP_DEPTH + // Ignore mask Z when using PGXP depth. + float pos_z = a_pos.w; + float pos_w = a_pos.w; +#else float pos_z = a_pos.z; float pos_w = a_pos.w; +#endif #if API_OPENGL || API_OPENGL_ES pos_y += POS_EPSILON; @@ -689,6 +699,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filter != GPUTextureFilter::Nearest); DefineMacro(ss, "UV_LIMITS", m_uv_limits); DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source); + DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); WriteCommonFunctions(ss); WriteBatchUniformBuffer(ss); @@ -800,17 +811,18 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, - true, use_dual_source ? 2 : 1, true, UsingMSAA(), UsingPerSampleShading()); + true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading()); } else { - DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true, - UsingMSAA(), UsingPerSampleShading()); + DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, + !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading()); } } else { - DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, true, UsingMSAA(), UsingPerSampleShading()); + DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), + UsingPerSampleShading()); } ss << R"( @@ -939,7 +951,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) o_col0 = float4(color, u_dst_alpha_factor / ialpha); #endif +#if !PGXP_DEPTH o_depth = oalpha * v_pos.z; +#endif } else { @@ -962,7 +976,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) #endif #endif +#if !PGXP_DEPTH o_depth = oalpha * v_pos.z; +#endif } #else // Non-transparency won't enable blending so we can write the mask here regardless. @@ -972,7 +988,9 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); #endif +#if !PGXP_DEPTH o_depth = oalpha * v_pos.z; +#endif #endif } )"; @@ -1196,6 +1214,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); + DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"}, @@ -1243,7 +1262,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) uint value = GET_VALUE(buffer_offset) | u_mask_or_bits; o_col0 = RGBA5551ToRGBA8(value); +#if !PGXP_DEPTH o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0; +#else + o_depth = 1.0; +#endif })"; return ss.str(); @@ -1257,6 +1280,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); + DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", "bool u_set_mask_bit", "float u_depth_value"}, @@ -1291,7 +1315,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0); #endif o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a); +#if !PGXP_DEPTH o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0)); +#else + o_depth = 1.0f; +#endif })"; return ss.str(); diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index b041908e7..842257765 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -7,7 +7,7 @@ class GPU_HW_ShaderGen : public ShaderGen public: GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, - bool supports_dual_source_blend); + bool pgxp_depth, bool supports_dual_source_blend); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); @@ -36,4 +36,5 @@ private: bool m_scaled_dithering; GPUTextureFilter m_texture_filter; bool m_uv_limits; + bool m_pgxp_depth; }; diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index a3afec1c0..be760d14e 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -506,8 +506,8 @@ void GPU_HW_Vulkan::ClearFramebuffer() m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); static constexpr VkClearColorValue cc = {}; + const VkClearDepthStencilValue cds = {m_pgxp_depth_buffer ? 1.0f : 0.0f}; static constexpr VkImageSubresourceRange csrr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; - static constexpr VkClearDepthStencilValue cds = {}; static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u}; vkCmdClearColorImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), &cc, 1u, &csrr); vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u, @@ -515,6 +515,7 @@ void GPU_HW_Vulkan::ClearFramebuffer() m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + m_last_depth_z = 1.0f; SetFullVRAMDirtyRectangle(); } @@ -597,7 +598,7 @@ bool GPU_HW_Vulkan::CompilePipelines() GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_supports_dual_source_blend); + m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3); @@ -659,7 +660,7 @@ bool GPU_HW_Vulkan::CompilePipelines() Vulkan::GraphicsPipelineBuilder gpbuilder; // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - for (u8 depth_test = 0; depth_test < 2; depth_test++) + for (u8 depth_test = 0; depth_test < 3; depth_test++) { for (u8 render_mode = 0; render_mode < 4; render_mode++) { @@ -671,6 +672,8 @@ bool GPU_HW_Vulkan::CompilePipelines() { for (u8 interlacing = 0; interlacing < 2; interlacing++) { + static constexpr std::array depth_test_values = { + VK_COMPARE_OP_ALWAYS, VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_LESS_OR_EQUAL}; const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); gpbuilder.SetPipelineLayout(m_batch_pipeline_layout); @@ -692,8 +695,7 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]); gpbuilder.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); - gpbuilder.SetDepthState(true, true, - (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); + gpbuilder.SetDepthState(true, true, depth_test_values[depth_test]); gpbuilder.SetNoBlendingState(); gpbuilder.SetMultisamples(m_multisamples, m_per_sample_shading); @@ -935,11 +937,11 @@ void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - // [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + const u8 depth_test = BoolToUInt8(m_batch.check_mask_before_draw) | (BoolToUInt8(m_batch.use_depth_buffer) << 1); VkPipeline pipeline = - m_batch_pipelines[BoolToUInt8(m_batch.check_mask_before_draw)][static_cast(render_mode)] - [static_cast(m_batch.texture_mode)][static_cast(m_batch.transparency_mode)] - [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; + m_batch_pipelines[depth_test][static_cast(render_mode)][static_cast(m_batch.texture_mode)][static_cast( + m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); vkCmdDraw(cmdbuf, num_vertices, 1, base_vertex, 0); @@ -1171,7 +1173,8 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, set_mask, check_mask); vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), &uniforms); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipelines[BoolToUInt8(check_mask)]); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)]); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1, &m_vram_write_descriptor_set, 0, nullptr); @@ -1201,7 +1204,7 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); + m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)]); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, &m_vram_copy_descriptor_set, 0, nullptr); vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), @@ -1283,6 +1286,9 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture() void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit() { + if (m_pgxp_depth_buffer) + return; + EndRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); @@ -1304,6 +1310,22 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit() RestoreGraphicsAPIState(); } +void GPU_HW_Vulkan::ClearDepthBuffer() +{ + EndRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + static const VkClearDepthStencilValue cds = {1.0f}; + static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u}; + vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u, + &dsrr); + + m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL); + m_last_depth_z = 1.0f; +} + std::unique_ptr GPU::CreateHardwareVulkanRenderer() { return std::make_unique(); diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h index e58ca2247..131703105 100644 --- a/src/core/gpu_hw_vulkan.h +++ b/src/core/gpu_hw_vulkan.h @@ -30,6 +30,7 @@ protected: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; + void ClearDepthBuffer() override; void SetScissorFromDrawingArea() override; void MapBatchVertexPointer(u32 required_vertices) override; void UnmapBatchVertexPointer(u32 used_vertices) override; @@ -107,7 +108,7 @@ private: VkBufferView m_texture_stream_buffer_view = VK_NULL_HANDLE; // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - DimensionalArray m_batch_pipelines{}; + DimensionalArray m_batch_pipelines{}; // [interlaced] std::array m_vram_fill_pipelines{}; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 7e07de2ed..ada7117a4 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -487,6 +487,8 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetBoolValue("GPU", "PGXPCPU", false); si.SetBoolValue("GPU", "PGXPPreserveProjFP", false); si.SetFloatValue("GPU", "PGXPTolerance", -1.0f); + si.SetBoolValue("GPU", "PGXPDepthBuffer", false); + si.SetFloatValue("GPU", "PGXPDepthClearThreshold", Settings::DEFAULT_GPU_PGXP_DEPTH_THRESHOLD); si.SetStringValue("Display", "CropMode", Settings::GetDisplayCropModeName(Settings::DEFAULT_DISPLAY_CROP_MODE)); si.SetIntValue("Display", "ActiveStartOffset", 0); @@ -694,6 +696,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) g_settings.display_crop_mode != old_settings.display_crop_mode || g_settings.display_aspect_ratio != old_settings.display_aspect_ratio || g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || + g_settings.gpu_pgxp_depth_buffer != old_settings.gpu_pgxp_depth_buffer || g_settings.display_active_start_offset != old_settings.display_active_start_offset || g_settings.display_active_end_offset != old_settings.display_active_end_offset || g_settings.display_line_start_offset != old_settings.display_line_start_offset || diff --git a/src/core/settings.cpp b/src/core/settings.cpp index a73facbe5..1b0685703 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -160,6 +160,8 @@ void Settings::Load(SettingsInterface& si) gpu_pgxp_cpu = si.GetBoolValue("GPU", "PGXPCPU", false); gpu_pgxp_preserve_proj_fp = si.GetBoolValue("GPU", "PGXPPreserveProjFP", false); gpu_pgxp_tolerance = si.GetFloatValue("GPU", "PGXPTolerance", -1.0f); + gpu_pgxp_depth_buffer = si.GetBoolValue("GPU", "PGXPDepthBuffer", false); + SetPGXPDepthClearThreshold(si.GetFloatValue("GPU", "PGXPDepthClearThreshold", DEFAULT_GPU_PGXP_DEPTH_THRESHOLD)); display_crop_mode = ParseDisplayCropMode( @@ -298,6 +300,8 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "PGXPCPU", gpu_pgxp_cpu); si.SetBoolValue("GPU", "PGXPPreserveProjFP", gpu_pgxp_preserve_proj_fp); si.SetFloatValue("GPU", "PGXPTolerance", gpu_pgxp_tolerance); + si.SetBoolValue("GPU", "PGXPDepthBuffer", gpu_pgxp_depth_buffer); + si.SetFloatValue("GPU", "PGXPDepthClearThreshold", GetPGXPDepthClearThreshold()); si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode)); si.SetIntValue("Display", "ActiveStartOffset", display_active_start_offset); diff --git a/src/core/settings.h b/src/core/settings.h index c4c775126..4f991a813 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -111,6 +111,7 @@ struct Settings bool gpu_pgxp_vertex_cache = false; bool gpu_pgxp_cpu = false; bool gpu_pgxp_preserve_proj_fp = false; + bool gpu_pgxp_depth_buffer = false; DisplayCropMode display_crop_mode = DisplayCropMode::None; DisplayAspectRatio display_aspect_ratio = DisplayAspectRatio::Auto; s16 display_active_start_offset = 0; @@ -130,6 +131,7 @@ struct Settings bool video_sync_enabled = true; float display_max_fps = 0.0f; float gpu_pgxp_tolerance = -1.0f; + float gpu_pgxp_depth_clear_threshold = 300.0f / 4096.0f; bool cdrom_read_thread = true; bool cdrom_region_check = true; @@ -194,6 +196,10 @@ struct Settings return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled; } + ALWAYS_INLINE bool UsingPGXPDepthBuffer() const { return gpu_pgxp_enable && gpu_pgxp_depth_buffer; } + ALWAYS_INLINE float GetPGXPDepthClearThreshold() const { return gpu_pgxp_depth_clear_threshold * 4096.0f; } + ALWAYS_INLINE void SetPGXPDepthClearThreshold(float value) { gpu_pgxp_depth_clear_threshold = value / 4096.0f; } + ALWAYS_INLINE bool IsUsingFastmem() const { return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler && @@ -281,6 +287,7 @@ struct Settings #endif static constexpr GPUTextureFilter DEFAULT_GPU_TEXTURE_FILTER = GPUTextureFilter::Nearest; static constexpr ConsoleRegion DEFAULT_CONSOLE_REGION = ConsoleRegion::Auto; + static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; #ifdef WITH_RECOMPILER static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;