diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 98a4bb255..2ee286db0 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -304,7 +304,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di void GPU_HW::RestoreDeviceContext() { g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); + SetVRAMRenderTarget(); g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); SetScissor(); m_batch_ubo_dirty = true; @@ -328,6 +328,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) // TODO: Use old_settings const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_downsample_mode != downsample_mode || + (static_cast(m_vram_depth_texture) != + (g_settings.UsingPGXPDepthBuffer() || !m_supports_framebuffer_fetch)) || (m_downsample_mode == GPUDownsampleMode::Box && g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)); const bool shaders_changed = @@ -400,7 +402,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_batch.use_depth_buffer = false; // might be null when resizing - if (m_vram_texture) + if (m_vram_depth_texture) { if (m_pgxp_depth_buffer) ClearDepthBuffer(); @@ -422,7 +424,6 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) if (framebuffer_changed) { - // TODO: weird vram loss when rapidly changing resolutions if (!CreateBuffers()) Panic("Failed to recreate buffers."); @@ -620,6 +621,12 @@ void GPU_HW::PrintSettingsToLog() Log_InfoFmt("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); } +bool GPU_HW::NeedsDepthBuffer() const +{ + // PGXP depth, or no fbfetch, which means we need depth for the mask bit. + return (m_pgxp_depth_buffer || !m_supports_framebuffer_fetch); +} + bool GPU_HW::CreateBuffers() { DestroyBuffers(); @@ -628,6 +635,8 @@ bool GPU_HW::CreateBuffers() const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u8 samples = static_cast(m_multisamples); + const bool needs_depth_buffer = NeedsDepthBuffer(); + Log_DevFmt("Depth buffer is {}needed", needs_depth_buffer ? "" : "NOT "); // Needed for Metal resolve. const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ? @@ -636,8 +645,9 @@ bool GPU_HW::CreateBuffers() if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || - !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) || + (needs_depth_buffer && + !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, + GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT))) || !(m_vram_read_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, @@ -647,7 +657,8 @@ bool GPU_HW::CreateBuffers() } GL_OBJECT_NAME(m_vram_texture, "VRAM Texture"); - GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); + if (m_vram_depth_texture) + GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture"); GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); @@ -689,7 +700,7 @@ bool GPU_HW::CreateBuffers() else if (m_downsample_mode == GPUDownsampleMode::Box) m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale); - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); + SetVRAMRenderTarget(); SetFullVRAMDirtyRectangle(); return true; } @@ -697,11 +708,18 @@ bool GPU_HW::CreateBuffers() void GPU_HW::ClearFramebuffer() { g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); - g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); + if (m_vram_depth_texture) + g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); ClearVRAMDirtyRectangle(); m_last_depth_z = 1.0f; } +void GPU_HW::SetVRAMRenderTarget() +{ + g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get(), + m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags); +} + void GPU_HW::DestroyBuffers() { ClearDisplayTexture(); @@ -723,19 +741,36 @@ void GPU_HW::DestroyBuffers() bool GPU_HW::CompilePipelines() { const GPUDevice::Features features = g_gpu_device->GetFeatures(); + const bool needs_depth_buffer = NeedsDepthBuffer(); + const bool write_mask_as_depth = (!m_pgxp_depth_buffer && needs_depth_buffer); + m_allow_shader_blend = (features.feedback_loops && (m_pgxp_depth_buffer || !needs_depth_buffer)); + GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, m_pgxp_depth_buffer, + m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, write_mask_as_depth, m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch, m_debanding); - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + - 2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); + const u32 total_pipelines = 2 + // vertex shaders + (5 * 5 * 9 * 2 * 2 * 2) + // fragment shaders + ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * 9 * 2 * 2 * 2) + // batch pipelines + ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe + 1 + // fullscreen quad VS + (2 * 2) + // vram fill + (1 + BoolToUInt32(write_mask_as_depth)) + // vram copy + (1 + BoolToUInt32(write_mask_as_depth)) + // vram write + 1 + // vram write replacement + (needs_depth_buffer ? 1 : 0) + // mask -> depth + 1 + // vram read + 2 + // extract/display + ((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample + + ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines); // vertex shaders - [textured] - // fragment shaders - [render_mode][texture_mode][dithering][interlacing] + // fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; DimensionalArray, 2> batch_vertex_shaders{}; - DimensionalArray, 2, 2, 9, 5, 4> batch_fragment_shaders{}; + DimensionalArray, 2, 2, 2, 9, 5, 5> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader); @@ -743,55 +778,58 @@ bool GPU_HW::CompilePipelines() for (u8 textured = 0; textured < 2; textured++) { - const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); + const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured), m_pgxp_depth_buffer); if (!(batch_vertex_shaders[textured] = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs))) return false; progress.Increment(); } - for (u8 render_mode = 0; render_mode < 4; render_mode++) + for (u8 render_mode = 0; render_mode < 5; render_mode++) { for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { - if (m_supports_framebuffer_fetch) - { - // Don't need multipass shaders. - if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && - render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) - { - progress.Increment(2 * 2 * 9); - continue; - } - } - else - { + if ( // Can't generate shader blending. - if (transparency_mode != static_cast(GPUTransparencyMode::Disabled)) - { - progress.Increment(2 * 2 * 9); - continue; - } + ((render_mode == static_cast(BatchRenderMode::ShaderBlend) && !features.feedback_loops) || + (render_mode != static_cast(BatchRenderMode::ShaderBlend) && + transparency_mode != static_cast(GPUTransparencyMode::Disabled))) || + // Don't need multipass shaders. + (m_supports_framebuffer_fetch && (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || + render_mode == static_cast(BatchRenderMode::OnlyTransparent)))) + { + progress.Increment(9 * 2 * 2 * 2); + continue; } for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) { - for (u8 dithering = 0; dithering < 2; dithering++) + for (u8 check_mask = 0; check_mask < 2; check_mask++) { - for (u8 interlacing = 0; interlacing < 2; interlacing++) + if (check_mask && render_mode != static_cast(BatchRenderMode::ShaderBlend)) { - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(transparency_mode), - static_cast(texture_mode), ConvertToBoolUnchecked(dithering), - ConvertToBoolUnchecked(interlacing)); + // mask bit testing is only valid with shader blending. + progress.Increment(2 * 2); + continue; + } - if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][dithering][interlacing] = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) { - return false; - } + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(transparency_mode), + static_cast(texture_mode), ConvertToBoolUnchecked(dithering), + ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask)); - progress.Increment(); + if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][check_mask][dithering] + [interlacing] = g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + { + return false; + } + + progress.Increment(); + } } } } @@ -820,116 +858,138 @@ bool GPU_HW::CompilePipelines() plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.geometry_shader = nullptr; - plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT); + plconfig.SetTargetFormats(VRAM_RT_FORMAT, needs_depth_buffer ? VRAM_DS_FORMAT : GPUTexture::Format::Unknown); plconfig.samples = m_multisamples; plconfig.per_sample_shading = m_per_sample_shading; - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + plconfig.render_pass_flags = m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags; + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - for (u8 depth_test = 0; depth_test < 3; depth_test++) + // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] + for (u8 depth_test = 0; depth_test < 2; depth_test++) { - for (u8 render_mode = 0; render_mode < 4; render_mode++) + if (depth_test && !m_pgxp_depth_buffer) { - if (m_supports_framebuffer_fetch) + // Not used. + continue; + } + + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + { + for (u8 render_mode = 0; render_mode < 5; render_mode++) { - // Don't need multipass shaders. - if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && - render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) + if ( + // Can't generate shader blending. + (render_mode == static_cast(BatchRenderMode::ShaderBlend) && !features.feedback_loops) || + // Don't need multipass shaders. + (m_supports_framebuffer_fetch && (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || + render_mode == static_cast(BatchRenderMode::OnlyTransparent)))) { - progress.Increment(2 * 2 * 9 * 5); + progress.Increment(9 * 2 * 2 * 2); continue; } - } - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) - { for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) { for (u8 dithering = 0; dithering < 2; dithering++) { for (u8 interlacing = 0; interlacing < 2; interlacing++) { - static constexpr std::array depth_test_values = { - GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, - GPUPipeline::DepthFunc::LessEqual}; - const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); - const bool use_shader_blending = - (textured && NeedsShaderBlending(static_cast(transparency_mode))); - - plconfig.input_layout.vertex_attributes = - textured ? - (m_clamp_uvs ? std::span( - vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : - std::span(vertex_attributes, - NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : - std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); - - plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); - plconfig.fragment_shader = - batch_fragment_shaders[render_mode] - [use_shader_blending ? transparency_mode : - static_cast(GPUTransparencyMode::Disabled)] - [texture_mode][dithering][interlacing] - .get(); - - plconfig.depth.depth_test = depth_test_values[depth_test]; - plconfig.depth.depth_write = !m_pgxp_depth_buffer || depth_test != 0; - plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - - if (!use_shader_blending && - ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - (textured && IsBlendedTextureFiltering(m_texture_filtering)))) + for (u8 check_mask = 0; check_mask < 2; check_mask++) { - plconfig.blend.enable = true; - plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; - plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; - plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + const bool use_shader_blending = + (render_mode == static_cast(BatchRenderMode::ShaderBlend) && + ((textured && + NeedsShaderBlending(static_cast(transparency_mode), (check_mask != 0))) || + check_mask)); - if (m_supports_dual_source_blend) + plconfig.input_layout.vertex_attributes = + textured ? + (m_clamp_uvs ? std::span( + vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : + std::span( + vertex_attributes, NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : + std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); + + plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); + plconfig.fragment_shader = + batch_fragment_shaders[render_mode] + [use_shader_blending ? transparency_mode : + static_cast(GPUTransparencyMode::Disabled)] + [texture_mode][use_shader_blending ? check_mask : 0][dithering][interlacing] + .get(); + Assert(plconfig.vertex_shader && plconfig.fragment_shader); + + if (needs_depth_buffer) { - plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; - plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; - plconfig.blend.blend_op = - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - GPUPipeline::BlendOp::ReverseSubtract : - GPUPipeline::BlendOp::Add; + plconfig.depth.depth_test = + m_pgxp_depth_buffer ? + (depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) : + (check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always); + + // Don't write for transparent, but still test. + plconfig.depth.depth_write = + !m_pgxp_depth_buffer || + (depth_test && transparency_mode == static_cast(GPUTransparencyMode::Disabled)); } - else + + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + + if (!use_shader_blending && + ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + (textured && IsBlendedTextureFiltering(m_texture_filtering)))) { - // TODO: This isn't entirely accurate, 127.5 versus 128. - // But if we use fbfetch on Mali, it doesn't matter. - plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; - plconfig.blend.dst_blend = GPUPipeline::BlendFunc::One; - if (static_cast(transparency_mode) == - GPUTransparencyMode::HalfBackgroundPlusHalfForeground) + plconfig.blend.enable = true; + plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; + plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; + + if (m_supports_dual_source_blend) { - plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; - plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::ConstantColor; - plconfig.blend.constant = 0x00808080u; + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; } + else + { + // TODO: This isn't entirely accurate, 127.5 versus 128. + // But if we use fbfetch on Mali, it doesn't matter. + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::One; + if (static_cast(transparency_mode) == + GPUTransparencyMode::HalfBackgroundPlusHalfForeground) + { + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; + plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::ConstantColor; + plconfig.blend.constant = 0x00808080u; + } - plconfig.blend.blend_op = - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - GPUPipeline::BlendOp::ReverseSubtract : - GPUPipeline::BlendOp::Add; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; + } } - } - if (!(m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering] - [interlacing] = g_gpu_device->CreatePipeline(plconfig))) - { - return false; - } + if (!(m_batch_pipelines[depth_test][transparency_mode][render_mode][texture_mode][dithering] + [interlacing][check_mask] = g_gpu_device->CreatePipeline(plconfig))) + { + return false; + } - progress.Increment(); + progress.Increment(); + } } } } @@ -968,12 +1028,15 @@ bool GPU_HW::CompilePipelines() plconfig.vertex_shader = nullptr; plconfig.geometry_shader = nullptr; plconfig.fragment_shader = nullptr; + + progress.Increment(); } batch_shader_guard.Run(); + // use a depth of 1, that way writes will reset the depth std::unique_ptr fullscreen_quad_vertex_shader = - g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader(1.0f)); if (!fullscreen_quad_vertex_shader) return false; @@ -1018,7 +1081,10 @@ bool GPU_HW::CompilePipelines() plconfig.fragment_shader = fs.get(); for (u8 depth_test = 0; depth_test < 2; depth_test++) { - plconfig.depth.depth_write = true; + if (depth_test && write_mask_as_depth) + continue; + + plconfig.depth.depth_write = needs_depth_buffer; plconfig.depth.depth_test = (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; @@ -1045,7 +1111,10 @@ bool GPU_HW::CompilePipelines() plconfig.fragment_shader = fs.get(); for (u8 depth_test = 0; depth_test < 2; depth_test++) { - plconfig.depth.depth_write = true; + if (depth_test && write_mask_as_depth) + continue; + + plconfig.depth.depth_write = needs_depth_buffer; plconfig.depth.depth_test = (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; @@ -1071,9 +1140,14 @@ bool GPU_HW::CompilePipelines() plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; + + progress.Increment(); } + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + // VRAM update depth + if (needs_depth_buffer) { std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMUpdateDepthFragmentShader()); @@ -1191,6 +1265,7 @@ bool GPU_HW::CompilePipelines() if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config))) return false; GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler"); + progress.Increment(); } else if (m_downsample_mode == GPUDownsampleMode::Box) { @@ -1207,10 +1282,9 @@ bool GPU_HW::CompilePipelines() return false; GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); + progress.Increment(); } - progress.Increment(); - #undef UPDATE_PROGRESS return true; @@ -1323,7 +1397,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) void GPU_HW::UpdateDepthBufferFromMaskBit() { - if (m_pgxp_depth_buffer) + if (m_pgxp_depth_buffer || !m_vram_depth_texture) return; // Viewport should already be set full, only need to fudge the scissor. @@ -1336,7 +1410,7 @@ void GPU_HW::UpdateDepthBufferFromMaskBit() // Restore. g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); + SetVRAMRenderTarget(); SetScissor(); } @@ -1389,13 +1463,18 @@ void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices) ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex) { - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - const u8 depth_test = m_batch.use_depth_buffer ? static_cast(2) : BoolToUInt8(m_batch.check_mask_before_draw); - g_gpu_device->SetPipeline( - m_batch_pipelines[depth_test][static_cast(render_mode)][static_cast(m_batch.texture_mode)][static_cast( - m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] - .get()); - g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] + const u8 depth_test = BoolToUInt8(m_batch.use_depth_buffer); + const u8 check_mask = BoolToUInt8(m_batch.check_mask_before_draw); + g_gpu_device->SetPipeline(m_batch_pipelines[depth_test][static_cast(m_batch.transparency_mode)][static_cast( + render_mode)][static_cast(m_batch.texture_mode)][BoolToUInt8(m_batch.dithering)] + [BoolToUInt8(m_batch.interlacing)][check_mask] + .get()); + + if (render_mode != BatchRenderMode::ShaderBlend || m_supports_framebuffer_fetch) + g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + else + g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full); } ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) @@ -1873,10 +1952,8 @@ void GPU_HW::LoadVertices() } else if (m_pgxp_depth_buffer) { - const bool use_depth = (m_batch.transparency_mode == GPUTransparencyMode::Disabled); - SetBatchDepthBuffer(use_depth); - if (use_depth) - CheckForDepthClear(vertices.data(), num_vertices); + SetBatchDepthBuffer(true); + CheckForDepthClear(vertices.data(), num_vertices); } } @@ -2410,10 +2487,11 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } -ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency) const +ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const { - return (m_supports_framebuffer_fetch && - (transparency == GPUTransparencyMode::BackgroundMinusForeground || + return (m_allow_shader_blend && + ((check_mask && (m_pgxp_depth_buffer || !m_vram_depth_texture)) || + transparency == GPUTransparencyMode::BackgroundMinusForeground || (!m_supports_dual_source_blend && (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering))))); } @@ -2472,7 +2550,7 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() void GPU_HW::ResetBatchVertexDepth() { - if (m_pgxp_depth_buffer) + if (m_pgxp_depth_buffer || !m_vram_depth_texture) return; Log_PerfPrint("Resetting batch vertex depth"); @@ -2736,7 +2814,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b // the viewport should already be set to the full vram, so just adjust the scissor const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); - g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)].get()); + g_gpu_device->SetPipeline( + m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer && NeedsDepthBuffer())].get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); if (upload_texture) { @@ -2815,7 +2894,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 g_gpu_device->SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight()); g_gpu_device->SetPipeline( - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)].get()); + m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())] + .get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); @@ -2945,7 +3025,7 @@ void GPU_HW::DispatchRenderCommand() rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || - (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_supports_framebuffer_fetch) || + (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || dithering_enable != m_batch.dithering) { FlushRender(); @@ -2956,8 +3036,9 @@ void GPU_HW::DispatchRenderCommand() if (m_batch_index_count == 0) { // transparency mode change + const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; if (transparency_mode != GPUTransparencyMode::Disabled && - (texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode))) + (texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode, check_mask_before_draw))) { static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; @@ -2969,7 +3050,6 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; } - const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; if (m_batch.check_mask_before_draw != check_mask_before_draw || m_batch.set_mask_while_drawing != set_mask_while_drawing) @@ -3052,7 +3132,11 @@ void GPU_HW::FlushRender() if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) { - if (NeedsTwoPassRendering()) + if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.check_mask_before_draw)) + { + DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex); + } + else if (NeedsTwoPassRendering()) { DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex); DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 35a0aa484..22b41aaeb 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -29,7 +29,8 @@ public: TransparencyDisabled, TransparentAndOpaque, OnlyOpaque, - OnlyTransparent + OnlyTransparent, + ShaderBlend }; GPU_HW(); @@ -115,6 +116,9 @@ private: u32 num_uniform_buffer_updates; }; + /// Returns true if a depth buffer should be created. + bool NeedsDepthBuffer() const; + bool CreateBuffers(); void ClearFramebuffer(); void DestroyBuffers(); @@ -131,6 +135,7 @@ private: void UpdateDepthBufferFromMaskBit(); void ClearDepthBuffer(); void SetScissor(); + void SetVRAMRenderTarget(); void MapGPUBuffer(u32 required_vertices, u32 required_indices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices); void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex); @@ -158,7 +163,7 @@ private: bool NeedsTwoPassRendering() const; /// Returns true if the draw is going to use shader blending/framebuffer fetch. - bool NeedsShaderBlending(GPUTransparencyMode transparency) const; + bool NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const; void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; @@ -236,6 +241,8 @@ private: bool m_clamp_uvs : 1 = false; bool m_compute_uv_range : 1 = false; bool m_pgxp_depth_buffer : 1 = false; + bool m_allow_shader_blend : 1 = false; + bool m_prefer_shader_blend : 1 = false; u8 m_texpage_dirty = 0; BatchConfig m_batch; @@ -249,8 +256,8 @@ private: Common::Rectangle m_vram_dirty_write_rect; Common::Rectangle m_current_uv_range; - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - DimensionalArray, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; + // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] + DimensionalArray, 2, 2, 2, 9, 5, 5, 2> m_batch_pipelines{}; std::unique_ptr m_wireframe_pipeline; // [wrapped][interlaced] diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 0d4e6ad9c..31d9501d3 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -7,13 +7,14 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, - GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth, + GPUTextureFilter texture_filtering, bool uv_limits, bool write_mask_as_depth, bool disable_color_perspective, bool supports_dual_source_blend, bool supports_framebuffer_fetch, bool debanding) : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch), m_resolution_scale(resolution_scale), m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits), - m_pgxp_depth(pgxp_depth), m_disable_color_perspective(disable_color_perspective), m_debanding(debanding) + m_write_mask_as_depth(write_mask_as_depth), m_disable_color_perspective(disable_color_perspective), + m_debanding(debanding) { } @@ -58,13 +59,13 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) false); } -std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) +std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pgxp_depth) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "UV_LIMITS", m_uv_limits); - DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); + DefineMacro(ss, "PGXP_DEPTH", pgxp_depth); WriteCommonFunctions(ss); WriteBatchUniformBuffer(ss); @@ -632,19 +633,20 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPUTextureMode texture_mode, - bool dithering, bool interlacing) + bool dithering, bool interlacing, bool check_mask) { - // Shouldn't be using shader blending without fbfetch. - DebugAssert(m_supports_framebuffer_fetch || transparency == GPUTransparencyMode::Disabled); + // TODO: don't write depth for shader blend + DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend); const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit; const bool textured = (texture_mode != GPUTextureMode::Disabled); - const bool use_framebuffer_fetch = (m_supports_framebuffer_fetch && transparency != GPUTransparencyMode::Disabled); - const bool use_dual_source = !use_framebuffer_fetch && m_supports_dual_source_blend && - ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && - render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || - m_texture_filter != GPUTextureFilter::Nearest); + const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend && + (transparency != GPUTransparencyMode::Disabled || check_mask)); + const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend && + ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && + render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || + m_texture_filter != GPUTextureFilter::Nearest)); std::stringstream ss; WriteHeader(ss); @@ -652,7 +654,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque); DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent); DefineMacro(ss, "TRANSPARENCY_MODE", static_cast(transparency)); - DefineMacro(ss, "SHADER_BLENDING", use_framebuffer_fetch); + DefineMacro(ss, "SHADER_BLENDING", shader_blending); + DefineMacro(ss, "CHECK_MASK_BIT", check_mask); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); @@ -668,7 +671,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filter != GPUTextureFilter::Nearest); DefineMacro(ss, "UV_LIMITS", m_uv_limits); DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source); - DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); + DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); WriteCommonFunctions(ss); WriteBatchUniformBuffer(ss); @@ -799,20 +802,20 @@ float3 ApplyDebanding(float2 frag_coord) { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, - true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), - false, m_disable_color_perspective, use_framebuffer_fetch); + true, use_dual_source ? 2 : 1, m_write_mask_as_depth, UsingMSAA(), + UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending); } else { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, - !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, - use_framebuffer_fetch); + m_write_mask_as_depth, UsingMSAA(), UsingPerSampleShading(), false, + m_disable_color_perspective, shader_blending); } } else { - DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), - UsingPerSampleShading(), false, m_disable_color_perspective, use_framebuffer_fetch); + DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, m_write_mask_as_depth, UsingMSAA(), + UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending); } ss << R"( @@ -930,6 +933,11 @@ float3 ApplyDebanding(float2 frag_coord) float4 bg_col = LAST_FRAG_COLOR; float4 fg_col = float4(color, oalpha); + #if CHECK_MASK_BIT + if (bg_col.a != 0.0) + discard; + #endif + #if TEXTURE_FILTERING #if TRANSPARENCY_MODE == 0 || TRANSPARENCY_MODE == 3 bg_col.rgb /= ialpha; @@ -964,7 +972,7 @@ float3 ApplyDebanding(float2 frag_coord) o_col0 = float4(color, oalpha); #endif - #if !PGXP_DEPTH + #if WRITE_MASK_AS_DEPTH o_depth = oalpha * v_pos.z; #endif @@ -981,7 +989,7 @@ float3 ApplyDebanding(float2 frag_coord) o_col0 = float4(color, oalpha); #endif - #if !PGXP_DEPTH + #if WRITE_MASK_AS_DEPTH o_depth = oalpha * v_pos.z; #endif @@ -998,7 +1006,7 @@ float3 ApplyDebanding(float2 frag_coord) o_col0 = float4(color, oalpha); #endif - #if !PGXP_DEPTH + #if WRITE_MASK_AS_DEPTH o_depth = oalpha * v_pos.z; #endif #else @@ -1009,7 +1017,7 @@ float3 ApplyDebanding(float2 frag_coord) o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); #endif - #if !PGXP_DEPTH + #if WRITE_MASK_AS_DEPTH o_depth = oalpha * v_pos.z; #endif #endif @@ -1233,7 +1241,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); + DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "USE_BUFFER", use_buffer); DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset", @@ -1266,7 +1274,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n"; } - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, m_write_mask_as_depth); ss << R"( { uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE); @@ -1291,10 +1299,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b #endif o_col0 = RGBA5551ToRGBA8(value); -#if !PGXP_DEPTH +#if WRITE_MASK_AS_DEPTH o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0; -#else - o_depth = 1.0; #endif })"; @@ -1309,7 +1315,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); + DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", "bool u_set_mask_bit", "float u_depth_value"}, @@ -1317,7 +1323,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() DeclareTexture(ss, "samp0", 0, msaa); DefineMacro(ss, "MSAA_COPY", msaa); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true, false, false, msaa); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, m_write_mask_as_depth, false, false, msaa); ss << R"( { uint2 dst_coords = uint2(v_pos.xy); @@ -1344,10 +1350,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0); #endif o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a); -#if !PGXP_DEPTH +#if WRITE_MASK_AS_DEPTH o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0)); -#else - o_depth = 1.0f; #endif })"; @@ -1359,14 +1363,14 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); + DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "WRAPPED", wrapped); DefineMacro(ss, "INTERLACED", interlaced); DeclareUniformBuffer( ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); - DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, true, false, false, false); + DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, m_write_mask_as_depth, false, false, false); ss << R"( { #if INTERLACED || WRAPPED @@ -1388,10 +1392,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool #endif o_col0 = u_fill_color; -#if !PGXP_DEPTH +#if WRITE_MASK_AS_DEPTH o_depth = u_fill_color.a; -#else - o_depth = 1.0f; #endif })"; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index ab89f93a8..94c938b51 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -10,13 +10,14 @@ class GPU_HW_ShaderGen : public ShaderGen public: GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, - bool pgxp_depth, bool disable_color_perspective, bool supports_dual_source_blend, + bool write_mask_as_depth, bool disable_color_perspective, bool supports_dual_source_blend, bool supports_framebuffer_fetch, bool debanding); ~GPU_HW_ShaderGen(); - std::string GenerateBatchVertexShader(bool textured); + std::string GenerateBatchVertexShader(bool textured, bool pgxp_depth); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, - GPUTextureMode texture_mode, bool dithering, bool interlacing); + GPUTextureMode texture_mode, bool dithering, bool interlacing, + bool check_mask); std::string GenerateWireframeGeometryShader(); std::string GenerateWireframeFragmentShader(); std::string GenerateVRAMReadFragmentShader(); @@ -48,7 +49,7 @@ private: bool m_scaled_dithering; GPUTextureFilter m_texture_filter; bool m_uv_limits; - bool m_pgxp_depth; + bool m_write_mask_as_depth; bool m_disable_color_perspective; bool m_debanding; }; diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 637d84cac..f0a132570 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -4,4 +4,4 @@ #pragma once #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 13; +static constexpr u32 SHADER_CACHE_VERSION = 14; diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 92c330f12..2f4893abe 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -249,7 +249,8 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -257,6 +258,7 @@ public: void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; bool GetHostRefreshRate(float* refresh_rate) override; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 9c28066e2..07c20b574 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -1721,8 +1721,10 @@ void MetalDevice::UnmapUniformBuffer(u32 size) } } -void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) +void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop) { + DebugAssert(!feedback_loop); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_rt_clear = false; @@ -1843,7 +1845,7 @@ void MetalDevice::UnbindTexture(MetalTexture* tex) if (m_current_render_targets[i] == tex) { Log_WarningPrint("Unbinding current RT"); - SetRenderTargets(nullptr, 0, m_current_depth_target); + SetRenderTargets(nullptr, 0, m_current_depth_target, GPUPipeline::NoRenderPassFlags); // TODO: Wrong break; } } @@ -1853,7 +1855,7 @@ void MetalDevice::UnbindTexture(MetalTexture* tex) if (m_current_depth_target == tex) { Log_WarningPrint("Unbinding current DS"); - SetRenderTargets(nullptr, 0, nullptr); + SetRenderTargets(nullptr, 0, nullptr, GPUPipeline::NoRenderPassFlags); } } } @@ -2094,6 +2096,11 @@ void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) baseInstance:0]; } +void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) +{ + Panic("Barriers are not supported"); +} + id MetalDevice::GetBlitEncoder(bool is_inline) { @autoreleasepool diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 407c28911..3c4c76ca4 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -1,12 +1,15 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "shadergen.h" + #include "common/assert.h" #include "common/bitutils.h" #include "common/log.h" + #include #include +#include #ifdef ENABLE_OPENGL #include "opengl_loader.h" @@ -660,20 +663,18 @@ void ShaderGen::DeclareFragmentEntryPoint( } } -std::string ShaderGen::GenerateScreenQuadVertexShader() +std::string ShaderGen::GenerateScreenQuadVertexShader(float z /* = 0.0f */) { std::stringstream ss; WriteHeader(ss); DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); - ss << R"( -{ - v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u)); - v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); - #if API_OPENGL || API_OPENGL_ES || API_VULKAN - v_pos.y = -v_pos.y; - #endif -} -)"; + ss << "{\n"; + ss << " v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u));\n"; + ss << " v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), " << std::fixed << z << "f, 1.0f);\n"; + ss << " #if API_OPENGL || API_OPENGL_ES || API_VULKAN\n"; + ss << " v_pos.y = -v_pos.y;\n"; + ss << " #endif\n"; + ss << "}\n"; return ss.str(); } diff --git a/src/util/shadergen.h b/src/util/shadergen.h index 0108d0c26..4338e2ec0 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -16,7 +16,7 @@ public: static bool UseGLSLBindingLayout(); - std::string GenerateScreenQuadVertexShader(); + std::string GenerateScreenQuadVertexShader(float z = 0.0f); std::string GenerateUVQuadVertexShader(); std::string GenerateFillFragmentShader(); std::string GenerateCopyFragmentShader();