From 391114ccae5696b1385250be97bc03e84396dd7d Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 18 Mar 2021 12:44:45 +1000 Subject: [PATCH] GPU/OpenGL: Only use one upload path and clamp to max SSBO size --- src/core/gpu_hw_opengl.cpp | 48 ++++++++++++++++++++++---------------- src/core/gpu_hw_opengl.h | 4 ++-- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 0fd07b627..4994f3fe2 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -319,22 +319,29 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) #ifdef __APPLE__ // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. - m_supports_texture_buffer = false; + m_use_texture_buffer_for_vram_writes = false; #else - m_supports_texture_buffer = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2); + m_use_texture_buffer_for_vram_writes = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2); #endif - if (m_supports_texture_buffer) + m_texture_stream_buffer_size = VRAM_UPDATE_TEXTURE_BUFFER_SIZE; + if (m_use_texture_buffer_for_vram_writes) { - glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast(&m_max_texture_buffer_size)); - Log_InfoPrintf("Max texel buffer size: %u", m_max_texture_buffer_size); - if (m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT) + GLint max_texel_buffer_size; + glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast(&max_texel_buffer_size)); + Log_InfoPrintf("Max texel buffer size: %u", max_texel_buffer_size); + if (max_texel_buffer_size < VRAM_WIDTH * VRAM_HEIGHT) { Log_WarningPrintf("Maximum texture buffer size is less than VRAM size, not using texel buffers."); - m_supports_texture_buffer = false; + m_use_texture_buffer_for_vram_writes = false; + } + else + { + m_texture_stream_buffer_size = + std::min(VRAM_UPDATE_TEXTURE_BUFFER_SIZE, static_cast(max_texel_buffer_size) * sizeof(u16)); } } - if (!m_supports_texture_buffer || m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT) + if (!m_use_texture_buffer_for_vram_writes) { // Try SSBOs. GLint max_fragment_storage_blocks = 0; @@ -352,11 +359,13 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) if (m_use_ssbo_for_vram_writes) { Log_InfoPrintf("Using shader storage buffers for VRAM writes."); + m_texture_stream_buffer_size = + static_cast(std::min(VRAM_UPDATE_TEXTURE_BUFFER_SIZE, static_cast(max_ssbo_size))); } else { - Log_WarningPrintf( - "Texture buffers are not supported, VRAM writes will be slower and multisampling will be unavailable."); + Log_WarningPrintf("Texture buffers and SSBOs are not supported, VRAM writes will be slower and multisampling " + "will be unavailable."); m_max_multisamples = 1; m_supports_per_sample_shading = false; } @@ -475,15 +484,14 @@ bool GPU_HW_OpenGL::CreateUniformBuffer() bool GPU_HW_OpenGL::CreateTextureBuffer() { - // We use the pixel unpack buffer here because we share it with CPU-decoded VRAM writes. const GLenum target = (m_use_ssbo_for_vram_writes ? GL_SHADER_STORAGE_BUFFER : - (m_supports_texture_buffer ? GL_TEXTURE_BUFFER : GL_PIXEL_UNPACK_BUFFER)); - m_texture_stream_buffer = GL::StreamBuffer::Create(target, VRAM_UPDATE_TEXTURE_BUFFER_SIZE); + (m_use_texture_buffer_for_vram_writes ? GL_TEXTURE_BUFFER : GL_PIXEL_UNPACK_BUFFER)); + m_texture_stream_buffer = GL::StreamBuffer::Create(target, m_texture_stream_buffer_size); if (!m_texture_stream_buffer) return false; - if (m_max_texture_buffer_size > 0) + if (m_use_texture_buffer_for_vram_writes) { glGenTextures(1, &m_texture_buffer_r16ui_texture); glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); @@ -673,7 +681,7 @@ bool GPU_HW_OpenGL::CompilePrograms() m_vram_update_depth_program = std::move(*prog); UPDATE_PROGRESS(); - if (m_supports_texture_buffer || m_use_ssbo_for_vram_writes) + if (m_use_texture_buffer_for_vram_writes || m_use_ssbo_for_vram_writes) { prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes), @@ -1060,7 +1068,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* } const u32 num_pixels = width * height; - if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes) + if (m_use_texture_buffer_for_vram_writes || m_use_ssbo_for_vram_writes) { const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); @@ -1092,10 +1100,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* } else { - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT || check_mask) { // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + Log_WarningPrintf("Oversized/masked VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); @@ -1109,6 +1117,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* // reverse copy the rows so it matches opengl's lower-left origin const u32 source_stride = width * sizeof(u16); const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); + const u16 mask_or = set_mask ? 0x8000 : 0x0000; u32* dest_ptr = static_cast(map_result.pointer); for (u32 row = 0; row < height; row++) { @@ -1119,8 +1128,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* u16 src_col; std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); source_row_ptr += sizeof(src_col); - - *(dest_ptr++) = RGBA5551ToRGBA8888(src_col); + *(dest_ptr++) = RGBA5551ToRGBA8888(src_col | mask_or); } source_ptr -= source_stride; diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 81785beaa..35725d0ac 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -104,9 +104,9 @@ private: GL::Program m_vram_update_depth_program; u32 m_uniform_buffer_alignment = 1; - u32 m_max_texture_buffer_size = 0; + u32 m_texture_stream_buffer_size = 0; - bool m_supports_texture_buffer = false; + bool m_use_texture_buffer_for_vram_writes = false; bool m_use_ssbo_for_vram_writes = false; GLenum m_current_depth_test = 0;