From 9ea7a8418cc8e98254d53b40ace1e0c840a4c634 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 14 Nov 2019 17:16:59 +1000 Subject: [PATCH] GPU: Eliminate temporary buffer when reading back --- src/core/gpu.cpp | 15 ++++++++------- src/core/gpu.h | 5 ++++- src/core/gpu_commands.cpp | 20 +++++++++++++++----- src/core/gpu_hw.cpp | 21 ++++----------------- src/core/gpu_hw.h | 1 - src/core/gpu_hw_d3d11.cpp | 5 +---- src/core/gpu_hw_d3d11.h | 2 +- src/core/gpu_hw_opengl.cpp | 5 +---- src/core/gpu_hw_opengl.h | 2 +- src/core/gpu_sw.cpp | 11 +++-------- src/core/gpu_sw.h | 2 +- 11 files changed, 39 insertions(+), 50 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index a5daa9f9c..3eaed5f06 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1,5 +1,6 @@ #include "gpu.h" #include "YBaseLib/Log.h" +#include "common/heap_array.h" #include "common/state_wrapper.h" #include "dma.h" #include "host_interface.h" @@ -127,16 +128,16 @@ bool GPU::DoState(StateWrapper& sw) if (sw.IsReading()) { - std::vector vram; - sw.Do(&vram); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, vram.data()); + // Still need a temporary here. + HeapArray temp; + sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data()); UpdateDisplay(); } else { - std::vector vram(VRAM_WIDTH * VRAM_HEIGHT); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, vram.data()); - sw.Do(&vram); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } return !sw.HasError(); @@ -693,7 +694,7 @@ void GPU::HandleGetGPUInfoCommand(u32 value) void GPU::UpdateDisplay() {} -void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) {} +void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {} diff --git a/src/core/gpu.h b/src/core/gpu.h index 126c5f6a5..1b1eebb65 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -288,7 +288,7 @@ protected: void HandleGetGPUInfoCommand(u32 value); // Rendering in the backend - virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer); + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); @@ -303,6 +303,9 @@ protected: InterruptController* m_interrupt_controller = nullptr; Timers* m_timers = nullptr; + // Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer. + u16* m_vram_ptr = nullptr; + union GPUSTAT { u32 bits; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 5a534413d..83067fc08 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -330,14 +330,24 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 comma // all rendering should be done first... FlushRender(); - // TODO: A better way of doing this.. - std::vector temp(num_words); - ReadVRAM(src_x, src_y, width, height, temp.data()); - for (const u32 bits : temp) - m_GPUREAD_buffer.push_back(bits); + // TODO: A better way of doing this.. get rid of the m_GPUREAD_buffer. + ReadVRAM(src_x, src_y, width, height); + for (u32 row = 0; row < height;) + { + const u32 row_offset = ((src_y + row++) % VRAM_HEIGHT) * VRAM_WIDTH; + for (u32 col = 0; col < width;) + { + // TODO: Handle unaligned reads... + const u32 col_offset1 = row_offset + ((src_x + col++) % VRAM_WIDTH); + const u32 col_offset2 = row_offset + ((src_x + col++) % VRAM_WIDTH); + m_GPUREAD_buffer.push_back(ZeroExtend32(m_vram_ptr[col_offset1]) | (ZeroExtend32(m_vram_ptr[col_offset2]) << 16)); + } + } if (m_system->GetSettings().debugging.dump_vram_to_cpu_copies) { + std::vector temp; + std::copy(m_GPUREAD_buffer.begin(), m_GPUREAD_buffer.end(), std::back_inserter(temp)); DumpVRAMToFile(SmallString::FromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++), width, height, sizeof(u16) * width, temp.data(), true); } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 4add266bb..01dfb328e 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -8,7 +8,10 @@ #include Log_SetChannel(GPU_HW); -GPU_HW::GPU_HW() = default; +GPU_HW::GPU_HW() : GPU() +{ + m_vram_ptr = m_vram_shadow.data(); +} GPU_HW::~GPU_HW() = default; @@ -209,22 +212,6 @@ GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) return BatchPrimitive::Triangles; } -void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) -{ - u8* out_ptr = static_cast(buffer); - - for (u32 row = 0; row < height; row++) - { - const u32 row_offset = ((y + row) % VRAM_HEIGHT) * VRAM_WIDTH; - for (u32 col = 0; col < width; col++) - { - const u32 col_offset = row_offset + ((x + col) % VRAM_WIDTH); - std::memcpy(out_ptr, &m_vram_shadow[col_offset], sizeof(u16)); - out_ptr += sizeof(u16); - } - } -} - void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { m_vram_dirty_rect.Include(Common::Rectangle::FromExtents(x, y, width, height)); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 9a2c22a78..b4861aaee 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -127,7 +127,6 @@ protected: bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } - void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 73cb2d5fb..e304addaf 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -584,7 +584,7 @@ void GPU_HW_D3D11::UpdateDisplay() } } -void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) +void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); @@ -614,9 +614,6 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) } RestoreGraphicsAPIState(); - - // Feed the shadow buffer back to the output. - GPU_HW::ReadVRAM(x, y, width, height, buffer); } void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 80a006640..11105e5c0 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -28,7 +28,7 @@ public: protected: void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index cb1b3de66..1f442c7ab 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -526,7 +526,7 @@ void GPU_HW_OpenGL::UpdateDisplay() } } -void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) +void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); @@ -554,9 +554,6 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ROW_LENGTH, 0); RestoreGraphicsAPIState(); - - // Feed the shadow buffer back to the output. - GPU_HW::ReadVRAM(x, y, width, height, buffer); } void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index fe4405ee0..877cc34d1 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -24,7 +24,7 @@ public: protected: void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 4df707786..d2458d038 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -10,6 +10,7 @@ Log_SetChannel(GPU_SW); GPU_SW::GPU_SW() { m_vram.fill(0); + m_vram_ptr = m_vram.data(); } GPU_SW::~GPU_SW() @@ -37,15 +38,9 @@ void GPU_SW::Reset() m_vram.fill(0); } -void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { - u16* buffer_ptr = static_cast(buffer); - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - u16* src_ptr = GetPixelPtr(x, y + yoffs); - std::copy_n(src_ptr, width, buffer_ptr); - buffer_ptr += width; - } + // No need to do anything - pointer is already up to date. } void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 2bb24ba82..8a93495c8 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -34,7 +34,7 @@ protected: u8 texcoord_x, texcoord_y; }; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;