diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index c71690578..4e09fb8d8 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -36,6 +36,7 @@ Log_SetChannel(GPU); std::unique_ptr g_gpu; alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)]; +u16 g_gpu_clut[GPU_CLUT_SIZE]; const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); @@ -173,7 +174,10 @@ void GPU::Reset(bool clear_vram) m_crtc_state.interlaced_display_field = 0; if (clear_vram) + { std::memset(g_vram, 0, sizeof(g_vram)); + std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); + } // Force event to reschedule itself. m_crtc_tick_event->Deactivate(); @@ -224,6 +228,7 @@ void GPU::SoftReset() SetDrawMode(0); SetTexturePalette(0); SetTextureWindow(0); + InvalidateCLUT(); UpdateDMARequest(); UpdateCRTCConfig(); UpdateCommandTickEvent(); @@ -308,6 +313,18 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ sw.Do(&m_command_total_words); sw.Do(&m_GPUREAD_latch); + if (sw.GetVersion() < 64) [[unlikely]] + { + // Clear CLUT cache and let it populate later. + InvalidateCLUT(); + } + else + { + sw.Do(&m_current_clut_reg_bits); + sw.Do(&m_current_clut_is_8bit); + sw.DoArray(g_gpu_clut, std::size(g_gpu_clut)); + } + sw.Do(&m_vram_transfer.x); sw.Do(&m_vram_transfer.y); sw.Do(&m_vram_transfer.width); @@ -1443,6 +1460,27 @@ void GPU::HandleGetGPUInfoCommand(u32 value) } } +void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut) +{ + if (texmode >= GPUTextureMode::Direct16Bit) + return; + + const bool needs_8bit = (texmode == GPUTextureMode::Palette8Bit); + if ((clut.bits != m_current_clut_reg_bits) || BoolToUInt8(needs_8bit) > BoolToUInt8(m_current_clut_is_8bit)) + { + Log_DebugFmt("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit"); + UpdateCLUT(clut, needs_8bit); + m_current_clut_reg_bits = clut.bits; + m_current_clut_is_8bit = needs_8bit; + } +} + +void GPU::InvalidateCLUT() +{ + m_current_clut_reg_bits = std::numeric_limits::max(); // will never match + m_current_clut_is_8bit = false; +} + void GPU::ClearDisplay() { ClearDisplayTexture(); @@ -1451,10 +1489,6 @@ void GPU::ClearDisplay() DestroyDeinterlaceTextures(); } -void GPU::UpdateDisplay() -{ -} - void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { } @@ -1615,14 +1649,6 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he } } -void GPU::DispatchRenderCommand() -{ -} - -void GPU::FlushRender() -{ -} - void GPU::SetDrawMode(u16 value) { GPUDrawModeReg new_mode_reg{static_cast(value & GPUDrawModeReg::MASK)}; @@ -1677,6 +1703,31 @@ void GPU::SetTextureWindow(u32 value) m_draw_mode.texture_window_changed = true; } +void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; + const u32 start_x = reg.GetXBase(); + if (!clut_is_8bit) + { + // Wraparound can't happen in 4-bit mode. + std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16); + } + else + { + if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] + { + const u32 end = VRAM_WIDTH - start_x; + const u32 start = 256 - end; + std::memcpy(dest, &src_row[start_x], sizeof(u16) * end); + std::memcpy(dest + end, src_row, sizeof(u16) * start); + } + else + { + std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256); + } + } +} + bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing) { GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, diff --git a/src/core/gpu.h b/src/core/gpu.h index 22f1ca174..1ee2c3b1f 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -200,7 +200,7 @@ public: bool DumpVRAMToFile(const char* filename); // Ensures all buffered vertices are drawn. - virtual void FlushRender(); + virtual void FlushRender() = 0; ALWAYS_INLINE const void* GetDisplayTextureHandle() const { return m_display_texture; } ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; } @@ -225,6 +225,9 @@ public: /// Draws the current display texture, with any post-processing. bool PresentDisplay(); + /// Reads the CLUT from the specified coordinates, accounting for wrap-around. + static void ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit); + protected: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -306,14 +309,17 @@ protected: void ExecuteCommands(); void TryExecuteCommands(); void HandleGetGPUInfoCommand(u32 value); + void UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut); + void InvalidateCLUT(); // Rendering in the backend virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); - virtual void DispatchRenderCommand(); - virtual void UpdateDisplay(); + virtual void DispatchRenderCommand() = 0; + virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; + virtual void UpdateDisplay() = 0; virtual void DrawRendererStats(); ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, @@ -568,6 +574,11 @@ protected: /// GPUREAD value for non-VRAM-reads. u32 m_GPUREAD_latch = 0; + // These are the bits from the palette register, but zero extended to 32-bit, so we can have an "invalid" value. + // If an extra byte is ever not needed here for padding, the 8-bit flag could be packed into the MSB of this value. + u32 m_current_clut_reg_bits = {}; + bool m_current_clut_is_8bit = false; + /// True if currently executing/syncing. bool m_executing_commands = false; @@ -693,3 +704,4 @@ private: extern std::unique_ptr g_gpu; extern u16 g_vram[VRAM_SIZE / sizeof(u16)]; +extern u16 g_gpu_clut[GPU_CLUT_SIZE]; diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index f232d870a..23bc5e7bf 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -73,6 +73,12 @@ GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); } +GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand() +{ + return static_cast( + AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); +} + GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); @@ -309,6 +315,13 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) } break; + case GPUBackendCommandType::UpdateCLUT: + { + const GPUBackendUpdateCLUTCommand* ccmd = static_cast(cmd); + UpdateCLUT(ccmd->reg, ccmd->clut_is_8bit); + } + break; + case GPUBackendCommandType::DrawPolygon: { DrawPolygon(static_cast(cmd)); @@ -328,6 +341,6 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) break; default: - break; + UnreachableCode(); } } diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index c764b4379..932419db9 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -33,6 +33,7 @@ public: GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); + GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); @@ -60,6 +61,7 @@ protected: virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; virtual void FlushRender() = 0; virtual void DrawingAreaChanged() = 0; + virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; void HandleCommand(const GPUBackendCommand* cmd); diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index cb0e491ad..9b6130431 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -198,6 +198,7 @@ bool GPU::HandleClearCacheCommand() { Log_DebugPrintf("GP0 clear cache"); m_draw_mode.SetTexturePageChanged(); + InvalidateCLUT(); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); @@ -347,6 +348,7 @@ bool GPU::HandleRenderPolygonCommand() SetDrawMode((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) | (m_draw_mode.mode_reg.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK)); SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); + UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); } m_counters.num_vertices += num_vertices; @@ -371,7 +373,10 @@ bool GPU::HandleRenderRectangleCommand() SynchronizeCRTC(); if (rc.texture_enable) + { SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); + UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); + } const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index e9080e396..45c4ff96e 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -3110,6 +3110,13 @@ void GPU_HW::DispatchRenderCommand() LoadVertices(); } +void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + // Not done in HW + GL_INS_FMT("Reloading CLUT from {},{}, {} not implemented", reg.GetXBase(), reg.GetYBase(), + clut_is_8bit ? "8-bit" : "4-bit"); +} + void GPU_HW::FlushRender() { const u32 base_vertex = m_batch_base_vertex; diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index e2d899fcf..3bd56f118 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -47,8 +47,8 @@ public: void UpdateSettings(const Settings& old_settings) override; void UpdateResolutionScale() override final; - std::tuple GetEffectiveDisplayResolution(bool scaled = true) override final; - std::tuple GetFullDisplayResolution(bool scaled = true) override final; + std::tuple GetEffectiveDisplayResolution(bool scaled = true) override; + std::tuple GetFullDisplayResolution(bool scaled = true) override; void UpdateDisplay() override; @@ -176,6 +176,7 @@ private: void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void DispatchRenderCommand() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; void FlushRender() override; void DrawRendererStats() override; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 4d05a0365..bc8402ec4 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -824,6 +824,19 @@ void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 m_backend.PushCommand(cmd); } +void GPU_SW::FlushRender() +{ +} + +void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + GPUBackendUpdateCLUTCommand* cmd = m_backend.NewUpdateCLUTCommand(); + FillBackendCommandParameters(cmd); + cmd->reg.bits = reg.bits; + cmd->clut_is_8bit = clut_is_8bit; + m_backend.PushCommand(cmd); +} + std::unique_ptr GPU::CreateSoftwareRenderer() { std::unique_ptr gpu(std::make_unique()); diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 483fc91a3..7e510ffaa 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -40,6 +40,8 @@ protected: void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void FlushRender() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; template bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip); diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index 648291752..2bc9f501e 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -1,8 +1,8 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include "gpu.h" #include "gpu_sw_backend.h" +#include "gpu.h" #include "system.h" #include "util/gpu_device.h" @@ -93,10 +93,8 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman const u16 palette_value = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); - const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; - - texture_color.bits = - GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase()); + const size_t palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; + texture_color.bits = g_gpu_clut[palette_index]; } break; @@ -105,9 +103,8 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman const u16 palette_value = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); - const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; - texture_color.bits = - GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase()); + const size_t palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; + texture_color.bits = g_gpu_clut[palette_index]; } break; @@ -869,9 +866,18 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi } } -void GPU_SW_Backend::FlushRender() {} +void GPU_SW_Backend::FlushRender() +{ +} -void GPU_SW_Backend::DrawingAreaChanged() {} +void GPU_SW_Backend::DrawingAreaChanged() +{ +} + +void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit); +} GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable) diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h index 88dfb70a8..7cde4c2b7 100644 --- a/src/core/gpu_sw_backend.h +++ b/src/core/gpu_sw_backend.h @@ -1,8 +1,11 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + +#include "gpu.h" #include "gpu_backend.h" + #include #include #include @@ -97,6 +100,7 @@ protected: void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; void FlushRender() override; void DrawingAreaChanged() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; ////////////////////////////////////////////////////////////////////////// // Rasterization diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 93951c6df..556561c5a 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -17,6 +17,7 @@ enum : u32 VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1, TEXTURE_PAGE_WIDTH = 256, TEXTURE_PAGE_HEIGHT = 256, + GPU_CLUT_SIZE = 256, // In interlaced modes, we can exceed the 512 height of VRAM, up to 576 in PAL games. GPU_MAX_DISPLAY_WIDTH = 720, @@ -259,9 +260,10 @@ enum class GPUBackendCommandType : u8 UpdateVRAM, CopyVRAM, SetDrawingArea, + UpdateCLUT, DrawPolygon, DrawRectangle, - DrawLine + DrawLine, }; union GPUBackendCommandParameters @@ -336,6 +338,12 @@ struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand GPUDrawingArea new_area; }; +struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand +{ + GPUTexturePaletteReg reg; + bool clut_is_8bit; +}; + struct GPUBackendDrawCommand : public GPUBackendCommand { GPUDrawModeReg draw_mode; diff --git a/src/core/save_state_version.h b/src/core/save_state_version.h index f9c251278..6d33a7c90 100644 --- a/src/core/save_state_version.h +++ b/src/core/save_state_version.h @@ -5,7 +5,7 @@ #include "types.h" static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; -static constexpr u32 SAVE_STATE_VERSION = 63; +static constexpr u32 SAVE_STATE_VERSION = 64; static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42; static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);