From 49569d29aa4e15c1d6e1d5088f2064946e532520 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 14 Nov 2019 22:17:09 +1000 Subject: [PATCH] GPU: Refactor command execution/VRAM->CPU transfers Simpler, and handles odd sizes correctly. --- src/core/gpu.cpp | 154 +++++++++++++++----------------------- src/core/gpu.h | 27 ++++++- src/core/gpu_commands.cpp | 117 +++++++++++++++++++---------- src/duckstation/main.cpp | 2 +- 4 files changed, 166 insertions(+), 134 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 3ff74b33a..3b0e5a084 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -34,6 +34,7 @@ void GPU::UpdateSettings() {} void GPU::Reset() { SoftReset(); + m_GPUREAD_latch = 0; } void GPU::SoftReset() @@ -47,8 +48,10 @@ void GPU::SoftReset() m_crtc_state.regs.display_address_start = 0; m_crtc_state.regs.horizontal_display_range = 0xC60260; m_crtc_state.regs.vertical_display_range = 0x3FC10; + m_state = State::Idle; + m_command_total_words = 0; + m_vram_transfer = {}; m_GP0_buffer.clear(); - m_GPUREAD_buffer.clear(); m_render_state = {}; m_render_state.texture_page_changed = true; UpdateGPUSTAT(); @@ -108,11 +111,16 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_crtc_state.in_hblank); sw.Do(&m_crtc_state.in_vblank); - if (sw.IsReading()) - UpdateSliceTicks(); + sw.Do(&m_GPUREAD_latch); + + sw.Do(&m_vram_transfer.x); + sw.Do(&m_vram_transfer.y); + sw.Do(&m_vram_transfer.width); + sw.Do(&m_vram_transfer.height); + sw.Do(&m_vram_transfer.col); + sw.Do(&m_vram_transfer.row); sw.Do(&m_GP0_buffer); - sw.Do(&m_GPUREAD_buffer); if (sw.IsReading()) { @@ -133,6 +141,7 @@ bool GPU::DoState(StateWrapper& sw) sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data()); UpdateDisplay(); + UpdateSliceTicks(); } else { @@ -149,9 +158,10 @@ void GPU::RestoreGraphicsAPIState() {} void GPU::UpdateGPUSTAT() { - m_GPUSTAT.ready_to_send_vram = !m_GPUREAD_buffer.empty(); - m_GPUSTAT.ready_to_recieve_cmd = m_GPUREAD_buffer.empty(); - m_GPUSTAT.ready_to_recieve_dma = m_GPUREAD_buffer.empty(); + m_GPUSTAT.ready_to_send_vram = (m_state == State::ReadingVRAM); + m_GPUSTAT.ready_to_recieve_cmd = (m_state == State::Idle); + m_GPUSTAT.ready_to_recieve_dma = + (m_state == State::Idle || (m_state != State::ReadingVRAM && m_command_total_words > 0)); bool dma_request; switch (m_GPUSTAT.dma_direction) @@ -228,22 +238,8 @@ void GPU::DMARead(u32* words, u32 word_count) return; } - const u32 words_to_copy = std::min(word_count, static_cast(m_GPUREAD_buffer.size())); - if (!m_GPUREAD_buffer.empty()) - { - auto it = m_GPUREAD_buffer.begin(); - for (u32 i = 0; i < word_count; i++) - words[i] = *(it++); - - m_GPUREAD_buffer.erase(m_GPUREAD_buffer.begin(), it); - } - if (words_to_copy < word_count) - { - Log_WarningPrintf("Partially-empty GPUREAD buffer on GPU DMA read"); - std::fill_n(words + words_to_copy, word_count - words_to_copy, u32(0)); - } - - UpdateGPUSTAT(); + for (u32 i = 0; i < word_count; i++) + words[i] = ReadGPUREAD(); } void GPU::DMAWrite(const u32* words, u32 word_count) @@ -252,53 +248,8 @@ void GPU::DMAWrite(const u32* words, u32 word_count) { case DMADirection::CPUtoGP0: { -#if 0 - // partial command buffered? have to go through the slow path - if (!m_GP0_buffer.empty()) - { - std::copy(words, words + word_count, std::back_inserter(m_GP0_buffer)); - const u32* command_ptr = m_GP0_buffer.data(); - u32 command_size = static_cast(m_GP0_buffer.size()); - do - { - const u32* prev_command_ptr = command_ptr; - const bool result = HandleGP0Command(command_ptr, command_size); - command_size -= command_ptr - prev_command_ptr; - if (!result) - break; - } while (command_size > 0); - - if (command_size > 0 && command_size < m_GP0_buffer.size()) - m_GP0_buffer.erase(m_GP0_buffer.begin(), m_GP0_buffer.begin() + (m_GP0_buffer.size() - command_size)); - else if (command_size == 0) - m_GP0_buffer.clear(); - } - else - { - // fast path - read directly from DMA buffer - const u32* command_ptr = words; - u32 command_size = word_count; - do - { - const u32* prev_command_ptr = command_ptr; - const bool result = HandleGP0Command(command_ptr, command_size); - command_size -= command_ptr - prev_command_ptr; - if (!result) - break; - } while (command_size > 0); - - if (command_size > 0) - { - // partial command left over - std::copy(command_ptr, command_ptr + command_size, std::back_inserter(m_GP0_buffer)); - } - } - - UpdateGPUSTAT(); -#else - for (u32 i = 0; i < word_count; i++) - WriteGP0(words[i]); -#endif + std::copy(words, words + word_count, std::back_inserter(m_GP0_buffer)); + ExecuteCommands(); } break; @@ -491,32 +442,44 @@ void GPU::Execute(TickCount ticks) u32 GPU::ReadGPUREAD() { - if (m_GPUREAD_buffer.empty()) + if (m_state != State::ReadingVRAM) + return m_GPUREAD_latch; + + // Read two pixels out of VRAM and combine them. Zero fill odd pixel counts. + u32 value = 0; + for (u32 i = 0; i < 2; i++) { - Log_DevPrintf("GPUREAD read while buffer is empty"); - return UINT32_C(0xFFFFFFFF); + // Read with correct wrap-around behavior. + const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH; + const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT; + value = (value << 16) | ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]); + + if (++m_vram_transfer.col == m_vram_transfer.width) + { + m_vram_transfer.col = 0; + + if (++m_vram_transfer.row == m_vram_transfer.height) + { + Log_DebugPrintf("End of VRAM->CPU transfer"); + m_vram_transfer = {}; + m_state = State::Idle; + UpdateGPUSTAT(); + + // end of transfer, catch up on any commands which were written (unlikely) + ExecuteCommands(); + break; + } + } } - const u32 value = m_GPUREAD_buffer.front(); - m_GPUREAD_buffer.pop_front(); - UpdateGPUSTAT(); + m_GPUREAD_latch = value; return value; } void GPU::WriteGP0(u32 value) { m_GP0_buffer.push_back(value); - Assert(m_GP0_buffer.size() <= 1048576); - - const u32* command_ptr = m_GP0_buffer.data(); - const u32 command = m_GP0_buffer[0] >> 24; - if ((this->*s_GP0_command_handler_table[command])(command_ptr, static_cast(m_GP0_buffer.size()))) - { - DebugAssert(static_cast(command_ptr - m_GP0_buffer.data()) == m_GP0_buffer.size()); - m_GP0_buffer.clear(); - } - - UpdateGPUSTAT(); + ExecuteCommands(); } void GPU::WriteGP1(u32 value) @@ -535,6 +498,9 @@ void GPU::WriteGP1(u32 value) case 0x01: // Clear FIFO { Log_DebugPrintf("GP1 clear FIFO"); + m_state = State::Idle; + m_command_total_words = 0; + m_vram_transfer = {}; m_GP0_buffer.clear(); UpdateGPUSTAT(); } @@ -658,31 +624,31 @@ void GPU::HandleGetGPUInfoCommand(u32 value) case 0x02: // Get Texture Window { Log_DebugPrintf("Get texture window"); - m_GPUREAD_buffer.push_back(m_render_state.texture_window_value); + m_GPUREAD_latch = m_render_state.texture_window_value; } break; case 0x03: // Get Draw Area Top Left { Log_DebugPrintf("Get drawing area top left"); - m_GPUREAD_buffer.push_back((m_drawing_area.left & UINT32_C(0b1111111111)) | - ((m_drawing_area.top & UINT32_C(0b1111111111)) << 10)); + m_GPUREAD_latch = + ((m_drawing_area.left & UINT32_C(0b1111111111)) | ((m_drawing_area.top & UINT32_C(0b1111111111)) << 10)); } break; case 0x04: // Get Draw Area Bottom Right { Log_DebugPrintf("Get drawing area bottom right"); - m_GPUREAD_buffer.push_back((m_drawing_area.right & UINT32_C(0b1111111111)) | - ((m_drawing_area.bottom & UINT32_C(0b1111111111)) << 10)); + m_GPUREAD_latch = + ((m_drawing_area.right & UINT32_C(0b1111111111)) | ((m_drawing_area.bottom & UINT32_C(0b1111111111)) << 10)); } break; case 0x05: // Get Drawing Offset { Log_DebugPrintf("Get drawing offset"); - m_GPUREAD_buffer.push_back((m_drawing_offset.x & INT32_C(0b11111111111)) | - ((m_drawing_offset.y & INT32_C(0b11111111111)) << 11)); + m_GPUREAD_latch = + ((m_drawing_offset.x & INT32_C(0b11111111111)) | ((m_drawing_offset.y & INT32_C(0b11111111111)) << 11)); } break; diff --git a/src/core/gpu.h b/src/core/gpu.h index 1b1eebb65..7413aeaa1 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -72,6 +72,15 @@ public: Disabled = 4 // Not a register value }; + enum class State : u8 + { + Idle, + WaitingForParameters, + ExecutingCommand, + ReadingVRAM, + WritingVRAM + }; + enum : u32 { VRAM_WIDTH = 1024, @@ -285,6 +294,8 @@ protected: u32 ReadGPUREAD(); void WriteGP0(u32 value); void WriteGP1(u32 value); + void ExecuteCommands(); + void EndCommand(); void HandleGetGPUInfoCommand(u32 value); // Rendering in the backend @@ -467,8 +478,22 @@ protected: bool in_vblank; } m_crtc_state = {}; + State m_state = State::Idle; + u32 m_command_total_words = 0; + struct VRAMTransfer + { + u16 x; + u16 y; + u16 width; + u16 height; + u16 col; + u16 row; + } m_vram_transfer = {}; + + /// GPUREAD value for non-VRAM-reads. + u32 m_GPUREAD_latch = 0; + std::vector m_GP0_buffer; - std::deque m_GPUREAD_buffer; struct Stats { diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index eebcf1a8f..79819924a 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -5,6 +5,14 @@ #include "system.h" Log_SetChannel(GPU); +#define CHECK_COMMAND_SIZE(num_words) \ + if (command_size < num_words) \ + { \ + m_command_total_words = num_words; \ + m_state = State::WaitingForParameters; \ + return false; \ + } + static u32 s_cpu_to_vram_dump_id = 1; static u32 s_vram_to_cpu_dump_id = 1; @@ -13,6 +21,38 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero) return value == 0 ? value_for_zero : value; } +void GPU::ExecuteCommands() +{ + Assert(m_GP0_buffer.size() < 1048576); + + const u32* command_ptr = m_GP0_buffer.data(); + u32 command_size = static_cast(m_GP0_buffer.size()); + while (m_state != State::ReadingVRAM && command_size > 0 && command_size >= m_command_total_words) + { + const u32 command = command_ptr[0] >> 24; + const u32* old_command_ptr = command_ptr; + if (!(this->*s_GP0_command_handler_table[command])(command_ptr, command_size)) + break; + + const u32 words_used = static_cast(command_ptr - old_command_ptr); + DebugAssert(words_used <= command_size); + command_size -= words_used; + } + + if (command_size == 0) + m_GP0_buffer.clear(); + else if (command_ptr > m_GP0_buffer.data()) + m_GP0_buffer.erase(m_GP0_buffer.begin(), m_GP0_buffer.begin() + (command_ptr - m_GP0_buffer.data())); + + UpdateGPUSTAT(); +} + +void GPU::EndCommand() +{ + m_state = State::Idle; + m_command_total_words = 0; +} + GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() { GP0CommandHandlerTable table = {}; @@ -50,12 +90,14 @@ bool GPU::HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size) { const u32 command = *(command_ptr++) >> 24; Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); + EndCommand(); return true; } bool GPU::HandleNOPCommand(const u32*& command_ptr, u32 command_size) { command_ptr++; + EndCommand(); return true; } @@ -63,6 +105,7 @@ bool GPU::HandleClearCacheCommand(const u32*& command_ptr, u32 command_size) { Log_DebugPrintf("GP0 clear cache"); command_ptr++; + EndCommand(); return true; } @@ -75,6 +118,7 @@ bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_siz m_interrupt_controller->InterruptRequest(InterruptController::IRQ::GPU); } + EndCommand(); return true; } @@ -89,6 +133,8 @@ bool GPU::HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size) m_render_state.texture_x_flip = (param & (1 << 12)) != 0; m_render_state.texture_y_flip = (param & (1 << 13)) != 0; Log_DebugPrintf("Set draw mode %08X", param); + + EndCommand(); return true; } @@ -99,6 +145,8 @@ bool GPU::HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_siz Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_render_state.texture_window_mask_x, m_render_state.texture_window_mask_y, m_render_state.texture_window_offset_x, m_render_state.texture_window_offset_y); + + EndCommand(); return true; } @@ -117,6 +165,7 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 comman m_drawing_area_changed = true; } + EndCommand(); return true; } @@ -136,6 +185,7 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 co m_drawing_area_changed = true; } + EndCommand(); return true; } @@ -153,6 +203,8 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz m_drawing_offset.y = y; m_drawing_offset_changed = true; } + + EndCommand(); return true; } @@ -164,6 +216,8 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size) m_GPUSTAT.draw_to_masked_pixels = (param & 0x01) != 0; Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.draw_set_mask_bit), BoolToUInt32(m_GPUSTAT.draw_to_masked_pixels)); + + EndCommand(); return true; } @@ -229,8 +283,7 @@ bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size) return true; } - if (command_size < total_words) - return false; + CHECK_COMMAND_SIZE(total_words); static constexpr std::array primitive_names = {{"", "polygon", "line", "rectangle"}}; @@ -244,13 +297,13 @@ bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size) command_ptr += total_words; m_stats.num_vertices += num_vertices; m_stats.num_polygons++; + EndCommand(); return true; } bool GPU::HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size) { - if (command_size < 3) - return false; + CHECK_COMMAND_SIZE(3); FlushRender(); @@ -265,20 +318,24 @@ bool GPU::HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size) FillVRAM(dst_x, dst_y, width, height, color); m_stats.num_vram_fills++; + EndCommand(); return true; } bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size) { - if (command_size < 3) - return false; + CHECK_COMMAND_SIZE(3); const u32 copy_width = ReplaceZero(command_ptr[2] & 0x3FF, 0x400); const u32 copy_height = ReplaceZero((command_ptr[2] >> 16) & 0x1FF, 0x200); const u32 num_pixels = copy_width * copy_height; const u32 num_words = 3 + ((num_pixels + 1) / 2); if (command_size < num_words) + { + m_command_total_words = num_words; + m_state = State::WritingVRAM; return false; + } const u32 dst_x = command_ptr[1] & 0x3FF; const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF; @@ -296,57 +353,40 @@ bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 comma UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &command_ptr[3]); command_ptr += num_words; m_stats.num_vram_writes++; + EndCommand(); return true; } bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size) { - if (command_size < 3) - return false; + CHECK_COMMAND_SIZE(3); - const u32 width = ReplaceZero(command_ptr[2] & 0x3FF, 0x400); - const u32 height = ReplaceZero((command_ptr[2] >> 16) & 0x1FF, 0x200); - const u32 num_pixels = width * height; - const u32 num_words = ((num_pixels + 1) / 2); - const u32 src_x = command_ptr[1] & 0x3FF; - const u32 src_y = (command_ptr[1] >> 16) & 0x3FF; + m_vram_transfer.width = ((Truncate16(command_ptr[2]) - 1) & 0x3FF) + 1; + m_vram_transfer.height = ((Truncate16(command_ptr[2] >> 16) - 1) & 0x1FF) + 1; + m_vram_transfer.x = Truncate16(command_ptr[1] & 0x3FF); + m_vram_transfer.y = Truncate16((command_ptr[1] >> 16) & 0x3FF); command_ptr += 3; - Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", src_x, src_y, width, height); + Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y, + m_vram_transfer.width, m_vram_transfer.height); + DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); // all rendering should be done first... FlushRender(); - // TODO: A better way of doing this.. get rid of the m_GPUREAD_buffer. - ReadVRAM(src_x, src_y, width, height); - for (u32 row = 0; row < height;) - { - const u32 row_offset = ((src_y + row++) % VRAM_HEIGHT) * VRAM_WIDTH; - for (u32 col = 0; col < width;) - { - // TODO: Handle unaligned reads... - const u32 col_offset1 = row_offset + ((src_x + col++) % VRAM_WIDTH); - const u32 col_offset2 = row_offset + ((src_x + col++) % VRAM_WIDTH); - m_GPUREAD_buffer.push_back(ZeroExtend32(m_vram_ptr[col_offset1]) | (ZeroExtend32(m_vram_ptr[col_offset2]) << 16)); - } - } - - if (m_system->GetSettings().debugging.dump_vram_to_cpu_copies) - { - std::vector temp; - std::copy(m_GPUREAD_buffer.begin(), m_GPUREAD_buffer.end(), std::back_inserter(temp)); - DumpVRAMToFile(SmallString::FromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++), width, height, - sizeof(u16) * width, temp.data(), true); - } + // ensure VRAM shadow is up to date + ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); + // switch to pixel-by-pixel read state m_stats.num_vram_reads++; + m_state = State::ReadingVRAM; + m_command_total_words = 0; return true; } bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size) { - if (command_size < 4) - return false; + CHECK_COMMAND_SIZE(4); const u32 src_x = command_ptr[1] & 0x3FF; const u32 src_y = (command_ptr[1] >> 16) & 0x3FF; @@ -369,5 +409,6 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 comm FlushRender(); CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); m_stats.num_vram_copies++; + EndCommand(); return true; } diff --git a/src/duckstation/main.cpp b/src/duckstation/main.cpp index ee1e25a59..a1daf9d8f 100644 --- a/src/duckstation/main.cpp +++ b/src/duckstation/main.cpp @@ -83,7 +83,7 @@ int main(int argc, char* argv[]) #else g_pLog->SetConsoleOutputParams(true, nullptr, LOGLEVEL_DEBUG); // g_pLog->SetConsoleOutputParams(true, "GPU GPU_HW_OpenGL SPU Pad DigitalController", LOGLEVEL_DEBUG); - g_pLog->SetConsoleOutputParams(true, "GPU GPU_HW_OpenGL Pad DigitalController MemoryCard InterruptController SPU MDEC", LOGLEVEL_DEBUG); + // g_pLog->SetConsoleOutputParams(true, "GPU GPU_HW_OpenGL Pad DigitalController MemoryCard InterruptController SPU MDEC", LOGLEVEL_DEBUG); // g_pLog->SetFilterLevel(LOGLEVEL_TRACE); g_pLog->SetFilterLevel(LOGLEVEL_DEBUG); // g_pLog->SetFilterLevel(LOGLEVEL_DEV);