diff --git a/README.md b/README.md index a667aafa8..b747fbf86 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ A "BIOS" ROM image is required to to start the emulator and to play games. You c ## Latest News +- 2020/08/01: Initial PGXP (geometry/perspective correction) support. - 2020/07/28: Qt frontend supports displaying interface in multiple languages. - 2020/07/23: m3u multi-disc support for libretro core. - 2020/07/22: Support multiple bindings for each controller button/axis. diff --git a/android/app/src/main/res/xml/root_preferences.xml b/android/app/src/main/res/xml/root_preferences.xml index c4a10f0a5..6589670a5 100644 --- a/android/app/src/main/res/xml/root_preferences.xml +++ b/android/app/src/main/res/xml/root_preferences.xml @@ -129,6 +129,50 @@ + + + + + + + + + + + + + + + + + + + + ] + app:useSimpleSummaryProvider="true" /> + @@ -126,6 +127,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 9261a37ae..775cff690 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -46,6 +46,7 @@ + @@ -94,5 +95,6 @@ + \ No newline at end of file diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 6a541aa7d..251d8810d 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -6,6 +6,8 @@ #include "cpu_disasm.h" #include "cpu_recompiler_thunks.h" #include "gte.h" +#include "pgxp.h" +#include "settings.h" #include "timing_event.h" #include Log_SetChannel(CPU::Core); @@ -73,6 +75,9 @@ void Initialize() g_state.cop0_regs.PRID = UINT32_C(0x00000002); GTE::Initialize(); + + if (g_settings.gpu_pgxp_enable) + PGXP::Initialize(); } void Shutdown() @@ -100,6 +105,9 @@ void Reset() GTE::Reset(); SetPC(RESET_VECTOR); + + if (g_settings.gpu_pgxp_enable) + PGXP::Initialize(); } bool DoState(StateWrapper& sw) @@ -137,6 +145,9 @@ bool DoState(StateWrapper& sw) if (!GTE::DoState(sw)) return false; + if (sw.IsReading()) + PGXP::Initialize(); + return !sw.HasError(); } @@ -893,7 +904,12 @@ void ExecuteInstruction() if (!ReadMemoryByte(addr, &value)) return; - WriteRegDelayed(inst.i.rt, SignExtend32(value)); + const u32 sxvalue = SignExtend32(value); + + WriteRegDelayed(inst.i.rt, sxvalue); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LBx(inst.bits, sxvalue, addr); } break; @@ -904,7 +920,11 @@ void ExecuteInstruction() if (!ReadMemoryHalfWord(addr, &value)) return; - WriteRegDelayed(inst.i.rt, SignExtend32(value)); + const u32 sxvalue = SignExtend32(value); + WriteRegDelayed(inst.i.rt, sxvalue); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LHx(inst.bits, sxvalue, addr); } break; @@ -916,6 +936,9 @@ void ExecuteInstruction() return; WriteRegDelayed(inst.i.rt, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LW(inst.bits, value, addr); } break; @@ -926,7 +949,11 @@ void ExecuteInstruction() if (!ReadMemoryByte(addr, &value)) return; - WriteRegDelayed(inst.i.rt, ZeroExtend32(value)); + const u32 zxvalue = ZeroExtend32(value); + WriteRegDelayed(inst.i.rt, zxvalue); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LBx(inst.bits, zxvalue, addr); } break; @@ -937,7 +964,11 @@ void ExecuteInstruction() if (!ReadMemoryHalfWord(addr, &value)) return; - WriteRegDelayed(inst.i.rt, ZeroExtend32(value)); + const u32 zxvalue = ZeroExtend32(value); + WriteRegDelayed(inst.i.rt, zxvalue); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LHx(inst.bits, zxvalue, addr); } break; @@ -966,6 +997,9 @@ void ExecuteInstruction() } WriteRegDelayed(inst.i.rt, new_value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LW(inst.bits, new_value, addr); } break; @@ -974,6 +1008,9 @@ void ExecuteInstruction() const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const u8 value = Truncate8(ReadReg(inst.i.rt)); WriteMemoryByte(addr, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_SB(inst.bits, value, addr); } break; @@ -982,6 +1019,9 @@ void ExecuteInstruction() const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const u16 value = Truncate16(ReadReg(inst.i.rt)); WriteMemoryHalfWord(addr, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_SH(inst.bits, value, addr); } break; @@ -990,6 +1030,9 @@ void ExecuteInstruction() const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const u32 value = ReadReg(inst.i.rt); WriteMemoryWord(addr, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_SW(inst.bits, value, addr); } break; @@ -1017,6 +1060,9 @@ void ExecuteInstruction() } WriteMemoryWord(aligned_addr, new_value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_SW(inst.bits, new_value, addr); } break; @@ -1132,6 +1178,9 @@ void ExecuteInstruction() return; GTE::WriteRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue())), value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_LWC2(inst.bits, value, addr); } break; @@ -1147,6 +1196,9 @@ void ExecuteInstruction() const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue()))); WriteMemoryWord(addr, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_SWC2(inst.bits, value, addr); } break; @@ -1230,20 +1282,44 @@ void ExecuteCop2Instruction() switch (inst.cop.CommonOp()) { case CopCommonInstruction::cfcn: - WriteRegDelayed(inst.r.rt, GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32)); - break; + { + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32); + WriteRegDelayed(inst.r.rt, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_CFC2(inst.bits, value, value); + } + break; case CopCommonInstruction::ctcn: - GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, ReadReg(inst.r.rt)); - break; + { + const u32 value = ReadReg(inst.r.rt); + GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_CTC2(inst.bits, value, value); + } + break; case CopCommonInstruction::mfcn: - WriteRegDelayed(inst.r.rt, GTE::ReadRegister(static_cast(inst.r.rd.GetValue()))); - break; + { + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue())); + WriteRegDelayed(inst.r.rt, value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_MFC2(inst.bits, value, value); + } + break; case CopCommonInstruction::mtcn: - GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), ReadReg(inst.r.rt)); - break; + { + const u32 value = ReadReg(inst.r.rt); + GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), value); + + if (g_settings.gpu_pgxp_enable) + PGXP::CPU_MTC2(inst.bits, value, value); + } + break; case CopCommonInstruction::bcnc: default: diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 42523791a..47fbc8afd 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -3,6 +3,8 @@ #include "cpu_core.h" #include "cpu_disasm.h" #include "gte.h" +#include "pgxp.h" +#include "settings.h" Log_SetChannel(CPU::Recompiler); // TODO: Turn load+sext/zext into a single signed/unsigned load @@ -1115,19 +1117,32 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) { case InstructionOp::lb: case InstructionOp::lbu: + { result = EmitLoadGuestMemory(cbi, address, RegSize_8); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb)); - break; + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address); + } + break; case InstructionOp::lh: case InstructionOp::lhu: + { result = EmitLoadGuestMemory(cbi, address, RegSize_16); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh)); - break; + + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address); + } + break; case InstructionOp::lw: + { result = EmitLoadGuestMemory(cbi, address, RegSize_32); - break; + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address); + } + break; default: UnreachableCode(); @@ -1153,16 +1168,34 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) switch (cbi.instruction.op) { case InstructionOp::sb: + { EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_8)); - break; + if (g_settings.gpu_pgxp_enable) + { + EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), + value.ViewAsSize(RegSize_8), address); + } + } + break; case InstructionOp::sh: + { EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_16)); - break; + if (g_settings.gpu_pgxp_enable) + { + EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), + value.ViewAsSize(RegSize_16), address); + } + } + break; case InstructionOp::sw: + { EmitStoreGuestMemory(cbi, address, value); - break; + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address); + } + break; default: UnreachableCode(); @@ -1827,11 +1860,17 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) { Value value = EmitLoadGuestMemory(cbi, address, RegSize_32); DoGTERegisterWrite(reg, value); + + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); } else { Value value = DoGTERegisterRead(reg); EmitStoreGuestMemory(cbi, address, value); + + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); } InstructionEpilogue(cbi); @@ -1851,7 +1890,19 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) ((cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0); InstructionPrologue(cbi, 1); - m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, DoGTERegisterRead(reg)); + + Value value = DoGTERegisterRead(reg); + + // PGXP done first here before ownership is transferred. + if (g_settings.gpu_pgxp_enable) + { + EmitFunctionCall( + nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? PGXP::CPU_CFC2 : PGXP::CPU_MFC2, + Value::FromConstantU32(cbi.instruction.bits), value, value); + } + + m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value)); + InstructionEpilogue(cbi); return true; } @@ -1863,7 +1914,17 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) ((cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0); InstructionPrologue(cbi, 1); - DoGTERegisterWrite(reg, m_register_cache.ReadGuestRegister(cbi.instruction.r.rt)); + + Value value = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + DoGTERegisterWrite(reg, value); + + if (g_settings.gpu_pgxp_enable) + { + EmitFunctionCall( + nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? PGXP::CPU_CTC2 : PGXP::CPU_MTC2, + Value::FromConstantU32(cbi.instruction.bits), value, value); + } + InstructionEpilogue(cbi); return true; } diff --git a/src/core/dma.cpp b/src/core/dma.cpp index f6157f4cf..d685f5158 100644 --- a/src/core/dma.cpp +++ b/src/core/dma.cpp @@ -429,7 +429,8 @@ void DMA::UnhaltTransfer(TickCount ticks) TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count) { const u32* src_pointer = reinterpret_cast(Bus::g_ram + address); - if (static_cast(increment) < 0 || ((address + (increment * word_count)) & ADDRESS_MASK) <= address) + if (channel != Channel::GPU && + (static_cast(increment) < 0 || ((address + (increment * word_count)) & ADDRESS_MASK) <= address)) { // Use temp buffer if it's wrapping around if (m_transfer_buffer.size() < word_count) @@ -447,8 +448,21 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen switch (channel) { case Channel::GPU: - g_gpu->DMAWrite(src_pointer, word_count); - break; + { + if (g_gpu->BeginDMAWrite()) + { + u8* ram_pointer = Bus::g_ram; + for (u32 i = 0; i < word_count; i++) + { + u32 value; + std::memcpy(&value, &ram_pointer[address], sizeof(u32)); + g_gpu->DMAWrite(address, value); + address = (address + increment) & ADDRESS_MASK; + } + g_gpu->EndDMAWrite(); + } + } + break; case Channel::SPU: g_spu.DMAWrite(src_pointer, word_count); diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index deabe216f..4f1a74407 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -349,32 +349,17 @@ void GPU::DMARead(u32* words, u32 word_count) words[i] = ReadGPUREAD(); } -void GPU::DMAWrite(const u32* words, u32 word_count) +void GPU::EndDMAWrite() { - switch (m_GPUSTAT.dma_direction) + m_fifo_pushed = true; + if (!m_syncing) { - case DMADirection::CPUtoGP0: - { - m_fifo.PushRange(words, word_count); - m_fifo_pushed = true; - if (!m_syncing) - { - ExecuteCommands(); - UpdateCommandTickEvent(); - } - else - { - UpdateDMARequest(); - } - } - break; - - default: - { - Log_ErrorPrintf("Unhandled GPU DMA write mode %u for %u words", - static_cast(m_GPUSTAT.dma_direction.GetValue()), word_count); - } - break; + ExecuteCommands(); + UpdateCommandTickEvent(); + } + else + { + UpdateDMARequest(); } } diff --git a/src/core/gpu.h b/src/core/gpu.h index 668adde1b..62f0a4817 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -136,7 +136,13 @@ public: // DMA access void DMARead(u32* words, u32 word_count); - void DMAWrite(const u32* words, u32 word_count); + + ALWAYS_INLINE bool BeginDMAWrite() const { return (m_GPUSTAT.dma_direction == DMADirection::CPUtoGP0); } + ALWAYS_INLINE void DMAWrite(u32 address, u32 value) + { + m_fifo.Push((ZeroExtend64(address) << 32) | ZeroExtend64(value)); + } + void EndDMAWrite(); /// Returns the number of pending GPU ticks. TickCount GetPendingCRTCTicks() const; @@ -276,6 +282,14 @@ protected: // Sprites/rectangles should be clipped to 12 bits before drawing. static constexpr s32 TruncateVertexPosition(s32 x) { return SignExtendN<11, s32>(x); } + struct NativeVertex + { + s16 x; + s16 y; + u32 color; + u16 texcoord; + }; + union VRAMPixel { u16 bits; @@ -700,11 +714,15 @@ protected: u16 row; } m_vram_transfer = {}; - HeapFIFOQueue m_fifo; + HeapFIFOQueue m_fifo; std::vector m_blit_buffer; u32 m_blit_remaining_words; RenderCommand m_render_command{}; + ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); } + ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); } + ALWAYS_INLINE u32 FifoPeek(u32 i) { return Truncate32(m_fifo.Peek(i)); } + TickCount m_max_run_ahead = 128; u32 m_fifo_size = 128; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 6d658ac18..c9334822c 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -33,7 +33,7 @@ void GPU::ExecuteCommands() { case BlitterState::Idle: { - const u32 command = m_fifo.Peek(0) >> 24; + const u32 command = FifoPeek(0) >> 24; if ((this->*s_GP0_command_handler_table[command])()) continue; else @@ -45,8 +45,11 @@ void GPU::ExecuteCommands() DebugAssert(m_blit_remaining_words > 0); const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize()); const size_t old_size = m_blit_buffer.size(); - m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); - m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); + // m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); + // FifoPopRange(&m_blit_buffer[old_size], words_to_copy); + m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy); + for (u32 i = 0; i < words_to_copy; i++) + m_blit_buffer.push_back(FifoPop()); m_blit_remaining_words -= words_to_copy; AddCommandTicks(words_to_copy); @@ -72,7 +75,7 @@ void GPU::ExecuteCommands() { // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. // terminator is on the first word for the vertex - if ((m_fifo.Peek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) + if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) break; } @@ -81,8 +84,11 @@ void GPU::ExecuteCommands() if (words_to_copy > 0) { const size_t old_size = m_blit_buffer.size(); - m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); - m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); + // m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); + // FifoPopRange(&m_blit_buffer[old_size], words_to_copy); + m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy); + for (u32 i = 0; i < words_to_copy; i++) + m_blit_buffer.push_back(FifoPop()); } Log_DebugPrintf("Added %u words to polyline", words_to_copy); @@ -170,12 +176,12 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() bool GPU::HandleUnknownGP0Command() { - const u32 command = m_fifo.Peek() >> 24; + const u32 command = FifoPeek() >> 24; Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); SmallString dump; for (u32 i = 0; i < m_fifo.GetSize(); i++) - dump.AppendFormattedString("%s0x%08X", (i > 0) ? " " : "", m_fifo.Peek(i)); + dump.AppendFormattedString("%s0x%08X", (i > 0) ? " " : "", FifoPeek(i)); Log_ErrorPrintf("FIFO: %s", dump.GetCharArray()); m_fifo.RemoveOne(); @@ -216,7 +222,7 @@ bool GPU::HandleInterruptRequestCommand() bool GPU::HandleSetDrawModeCommand() { - const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const u32 param = FifoPop() & 0x00FFFFFFu; Log_DebugPrintf("Set draw mode %08X", param); SetDrawMode(Truncate16(param)); AddCommandTicks(1); @@ -226,7 +232,7 @@ bool GPU::HandleSetDrawModeCommand() bool GPU::HandleSetTextureWindowCommand() { - const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const u32 param = FifoPop() & 0x00FFFFFFu; SetTextureWindow(param); Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, @@ -239,7 +245,7 @@ bool GPU::HandleSetTextureWindowCommand() bool GPU::HandleSetDrawingAreaTopLeftCommand() { - const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const u32 param = FifoPop() & 0x00FFFFFFu; const u32 left = param & VRAM_WIDTH_MASK; const u32 top = (param >> 10) & VRAM_HEIGHT_MASK; Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); @@ -259,7 +265,7 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand() bool GPU::HandleSetDrawingAreaBottomRightCommand() { - const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const u32 param = FifoPop() & 0x00FFFFFFu; const u32 right = param & VRAM_WIDTH_MASK; const u32 bottom = (param >> 10) & VRAM_HEIGHT_MASK; @@ -280,7 +286,7 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand() bool GPU::HandleSetDrawingOffsetCommand() { - const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const u32 param = FifoPop() & 0x00FFFFFFu; const s32 x = SignExtendN<11, s32>(param & 0x7FFu); const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu); Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y); @@ -299,7 +305,7 @@ bool GPU::HandleSetDrawingOffsetCommand() bool GPU::HandleSetMaskBitCommand() { - const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const u32 param = FifoPop() & 0x00FFFFFFu; constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; @@ -318,7 +324,7 @@ bool GPU::HandleSetMaskBitCommand() bool GPU::HandleRenderPolygonCommand() { - const RenderCommand rc{m_fifo.Peek(0)}; + const RenderCommand rc{FifoPeek(0)}; // shaded vertices use the colour from the first word for the first vertex const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); @@ -344,10 +350,10 @@ bool GPU::HandleRenderPolygonCommand() // set draw state up if (rc.texture_enable) { - const u16 texpage_attribute = Truncate16((rc.shading_enable ? m_fifo.Peek(5) : m_fifo.Peek(4)) >> 16); + const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) | (m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); - SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); + SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); } m_stats.num_vertices += num_vertices; @@ -362,7 +368,7 @@ bool GPU::HandleRenderPolygonCommand() bool GPU::HandleRenderRectangleCommand() { - const RenderCommand rc{m_fifo.Peek(0)}; + const RenderCommand rc{FifoPeek(0)}; const u32 total_words = 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable); @@ -372,7 +378,7 @@ bool GPU::HandleRenderRectangleCommand() SynchronizeCRTC(); if (rc.texture_enable) - SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); + SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); @@ -394,7 +400,7 @@ bool GPU::HandleRenderRectangleCommand() bool GPU::HandleRenderLineCommand() { - const RenderCommand rc{m_fifo.Peek(0)}; + const RenderCommand rc{FifoPeek(0)}; const u32 total_words = rc.shading_enable ? 4 : 3; CHECK_COMMAND_SIZE(total_words); @@ -417,7 +423,7 @@ bool GPU::HandleRenderLineCommand() bool GPU::HandleRenderPolyLineCommand() { // always read the first two vertices, we test for the terminator after that - const RenderCommand rc{m_fifo.Peek(0)}; + const RenderCommand rc{FifoPeek(0)}; const u32 min_words = rc.shading_enable ? 3 : 4; CHECK_COMMAND_SIZE(min_words); @@ -434,8 +440,11 @@ bool GPU::HandleRenderPolyLineCommand() m_fifo.RemoveOne(); const u32 words_to_pop = min_words - 1; - m_blit_buffer.resize(words_to_pop); - m_fifo.PopRange(m_blit_buffer.data(), words_to_pop); + // m_blit_buffer.resize(words_to_pop); + // FifoPopRange(m_blit_buffer.data(), words_to_pop); + m_blit_buffer.reserve(words_to_pop); + for (u32 i = 0; i < words_to_pop; i++) + m_blit_buffer.push_back(Truncate32(FifoPop())); // polyline goes via a different path through the blit buffer m_blitter_state = BlitterState::DrawingPolyLine; @@ -452,11 +461,11 @@ bool GPU::HandleFillRectangleCommand() FlushRender(); - const u32 color = m_fifo.Pop() & 0x00FFFFFF; - const u32 dst_x = m_fifo.Peek() & 0x3F0; - const u32 dst_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; - const u32 width = ((m_fifo.Peek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF; - const u32 height = (m_fifo.Pop() >> 16) & VRAM_HEIGHT_MASK; + const u32 color = FifoPop() & 0x00FFFFFF; + const u32 dst_x = FifoPeek() & 0x3F0; + const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; + const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF; + const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); @@ -472,10 +481,10 @@ bool GPU::HandleCopyRectangleCPUToVRAMCommand() CHECK_COMMAND_SIZE(3); m_fifo.RemoveOne(); - const u32 dst_x = m_fifo.Peek() & VRAM_COORD_MASK; - const u32 dst_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; - const u32 copy_width = ReplaceZero(m_fifo.Peek() & VRAM_WIDTH_MASK, 0x400); - const u32 copy_height = ReplaceZero((m_fifo.Pop() >> 16) & VRAM_HEIGHT_MASK, 0x200); + const u32 dst_x = FifoPeek() & VRAM_COORD_MASK; + const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; + const u32 copy_width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400); + const u32 copy_height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200); const u32 num_pixels = copy_width * copy_height; const u32 num_words = ((num_pixels + 1) / 2); @@ -520,10 +529,10 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() CHECK_COMMAND_SIZE(3); m_fifo.RemoveOne(); - m_vram_transfer.x = Truncate16(m_fifo.Peek() & VRAM_COORD_MASK); - m_vram_transfer.y = Truncate16((m_fifo.Pop() >> 16) & VRAM_COORD_MASK); - m_vram_transfer.width = ((Truncate16(m_fifo.Peek()) - 1) & VRAM_WIDTH_MASK) + 1; - m_vram_transfer.height = ((Truncate16(m_fifo.Pop() >> 16) - 1) & VRAM_HEIGHT_MASK) + 1; + m_vram_transfer.x = Truncate16(FifoPeek() & VRAM_COORD_MASK); + m_vram_transfer.y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK); + m_vram_transfer.width = ((Truncate16(FifoPeek()) - 1) & VRAM_WIDTH_MASK) + 1; + m_vram_transfer.height = ((Truncate16(FifoPop() >> 16) - 1) & VRAM_HEIGHT_MASK) + 1; Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); @@ -554,12 +563,12 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() CHECK_COMMAND_SIZE(4); m_fifo.RemoveOne(); - const u32 src_x = m_fifo.Peek() & VRAM_COORD_MASK; - const u32 src_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; - const u32 dst_x = m_fifo.Peek() & VRAM_COORD_MASK; - const u32 dst_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; - const u32 width = ReplaceZero(m_fifo.Peek() & VRAM_WIDTH_MASK, 0x400); - const u32 height = ReplaceZero((m_fifo.Pop() >> 16) & VRAM_HEIGHT_MASK, 0x200); + const u32 src_x = FifoPeek() & VRAM_COORD_MASK; + const u32 src_y = (FifoPop() >> 16) & VRAM_COORD_MASK; + const u32 dst_x = FifoPeek() & VRAM_COORD_MASK; + const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; + const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400); + const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200); Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, width, height); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index aaf93604f..f544451c8 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -2,17 +2,25 @@ #include "common/assert.h" #include "common/log.h" #include "common/state_wrapper.h" +#include "cpu_core.h" +#include "pgxp.h" #include "settings.h" #include "system.h" #include #include Log_SetChannel(GPU_HW); -GPU_HW::GPU_HW() : GPU() { m_vram_ptr = m_vram_shadow.data(); } +GPU_HW::GPU_HW() : GPU() +{ + m_vram_ptr = m_vram_shadow.data(); +} GPU_HW::~GPU_HW() = default; -bool GPU_HW::IsHardwareRenderer() const { return true; } +bool GPU_HW::IsHardwareRenderer() const +{ + return true; +} bool GPU_HW::Initialize(HostDisplay* host_display) { @@ -110,35 +118,39 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) // It might be faster to do more direct checking here, but the code below handles primitives in any order and // orientation, and is far more SIMD-friendly if needed. - const s32 abx = vertices[1].x - vertices[0].x; - const s32 aby = vertices[1].y - vertices[0].y; - const s32 bcx = vertices[2].x - vertices[1].x; - const s32 bcy = vertices[2].y - vertices[1].y; - const s32 cax = vertices[0].x - vertices[2].x; - const s32 cay = vertices[0].y - vertices[2].y; + const float abx = vertices[1].x - vertices[0].x; + const float aby = vertices[1].y - vertices[0].y; + const float bcx = vertices[2].x - vertices[1].x; + const float bcy = vertices[2].y - vertices[1].y; + const float cax = vertices[0].x - vertices[2].x; + const float cay = vertices[0].y - vertices[2].y; // Compute static derivatives, just assume W is uniform across the primitive and that the plane equation remains the // same across the quad. (which it is, there is no Z.. yet). - const s32 dudx = -aby * vertices[2].u - bcy * vertices[0].u - cay * vertices[1].u; - const s32 dvdx = -aby * vertices[2].v - bcy * vertices[0].v - cay * vertices[1].v; - const s32 dudy = +abx * vertices[2].u + bcx * vertices[0].u + cax * vertices[1].u; - const s32 dvdy = +abx * vertices[2].v + bcx * vertices[0].v + cax * vertices[1].v; - const s32 area = bcx * cay - bcy * cax; + const float dudx = -aby * static_cast(vertices[2].u) - bcy * static_cast(vertices[0].u) - + cay * static_cast(vertices[1].u); + const float dvdx = -aby * static_cast(vertices[2].v) - bcy * static_cast(vertices[0].v) - + cay * static_cast(vertices[1].v); + const float dudy = +abx * static_cast(vertices[2].u) + bcx * static_cast(vertices[0].u) + + cax * static_cast(vertices[1].u); + const float dvdy = +abx * static_cast(vertices[2].v) + bcx * static_cast(vertices[0].v) + + cax * static_cast(vertices[1].v); + const float area = bcx * cay - bcy * cax; // Detect and reject any triangles with 0 size texture area const s32 texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v); // Shouldn't matter as degenerate primitives will be culled anyways. - if (area == 0 && texArea == 0) + if (area == 0.0f && texArea == 0) return; // Use floats here as it'll be faster than integer divides. - const float rcp_area = 1.0f / static_cast(area); - const float dudx_area = static_cast(dudx) * rcp_area; - const float dudy_area = static_cast(dudy) * rcp_area; - const float dvdx_area = static_cast(dvdx) * rcp_area; - const float dvdy_area = static_cast(dvdy) * rcp_area; + const float rcp_area = 1.0f / area; + const float dudx_area = dudx * rcp_area; + const float dudy_area = dudy * rcp_area; + const float dvdx_area = dvdx * rcp_area; + const float dvdy_area = dvdy * rcp_area; const bool neg_dudx = dudx_area < 0.0f; const bool neg_dudy = dudy_area < 0.0f; const bool neg_dvdx = dvdx_area < 0.0f; @@ -179,22 +191,22 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) // The PlayStation GPU draws lines from start to end, inclusive. Or, more specifically, inclusive of the greatest delta // in the x or y direction. -void GPU_HW::FixLineVertexCoordinates(BatchVertex& start, BatchVertex& end, s32 dx, s32 dy) +void GPU_HW::FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy) { // deliberately not else if to catch the equal case if (dx >= dy) { - if (start.x > end.x) - start.x++; + if (start_x > end_x) + start_x++; else - end.x++; + end_x++; } if (dx <= dy) { - if (start.y > end.y) - start.y++; + if (start_y > end_y) + start_y++; else - end.y++; + end_y++; } } @@ -202,6 +214,7 @@ void GPU_HW::LoadVertices() { const RenderCommand rc{m_render_command.bits}; const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); + const float depth = GetCurrentNormalizedVertexDepth(); if (m_GPUSTAT.check_mask_before_draw) m_current_depth++; @@ -215,17 +228,36 @@ void GPU_HW::LoadVertices() const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; + const bool pgxp = g_settings.gpu_pgxp_enable; const u32 num_vertices = rc.quad_polygon ? 4 : 3; std::array vertices; + std::array, 4> native_vertex_positions; + bool valid_w = g_settings.gpu_pgxp_texture_correction; for (u32 i = 0; i < num_vertices; i++) { - const u32 color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; - const VertexPosition vp{m_fifo.Pop()}; - const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0; + const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const VertexPosition vp{Truncate32(maddr_and_pos)}; + const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; + const s32 native_x = m_drawing_offset.x + vp.x; + const s32 native_y = m_drawing_offset.y + vp.y; + native_vertex_positions[i][0] = native_x; + native_vertex_positions[i][1] = native_y; + vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, + texcoord); - vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, texpage, - packed_texcoord); + if (pgxp) + { + valid_w &= + PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x, + m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w); + } + } + if (!valid_w) + { + for (BatchVertex& v : vertices) + v.w = 1.0f; } if (rc.quad_polygon && m_resolution_scale > 1) @@ -235,19 +267,20 @@ void GPU_HW::LoadVertices() return; // Cull polygons which are too large. - const s32 min_x_12 = std::min(vertices[1].x, vertices[2].x); - const s32 max_x_12 = std::max(vertices[1].x, vertices[2].x); - const s32 min_y_12 = std::min(vertices[1].y, vertices[2].y); - const s32 max_y_12 = std::max(vertices[1].y, vertices[2].y); - const s32 min_x = std::min(min_x_12, vertices[0].x); - const s32 max_x = std::max(max_x_12, vertices[0].x); - const s32 min_y = std::min(min_y_12, vertices[0].y); - const s32 max_y = std::max(max_y_12, vertices[0].y); + const s32 min_x_12 = std::min(native_vertex_positions[1][0], native_vertex_positions[2][0]); + const s32 max_x_12 = std::max(native_vertex_positions[1][0], native_vertex_positions[2][0]); + const s32 min_y_12 = std::min(native_vertex_positions[1][1], native_vertex_positions[2][1]); + const s32 max_y_12 = std::max(native_vertex_positions[1][1], native_vertex_positions[2][1]); + const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]); + const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]); + const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]); + const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]); if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) { - Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", vertices[0].x, vertices[0].y, vertices[1].x, - vertices[1].y, vertices[2].x, vertices[2].y); + Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", native_vertex_positions[0][0], + native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1], + native_vertex_positions[2][0], native_vertex_positions[2][1]); } else { @@ -268,16 +301,17 @@ void GPU_HW::LoadVertices() // quads if (rc.quad_polygon) { - const s32 min_x_123 = std::min(min_x_12, vertices[3].x); - const s32 max_x_123 = std::max(max_x_12, vertices[3].x); - const s32 min_y_123 = std::min(min_y_12, vertices[3].y); - const s32 max_y_123 = std::max(max_y_12, vertices[3].y); + const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]); + const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]); + const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]); + const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]); // Cull polygons which are too large. if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) { - Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", vertices[2].x, - vertices[2].y, vertices[1].x, vertices[1].y, vertices[0].x, vertices[0].y); + Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", + native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0], + native_vertex_positions[1][1], native_vertex_positions[0][0], native_vertex_positions[0][1]); } else { @@ -303,11 +337,11 @@ void GPU_HW::LoadVertices() case Primitive::Rectangle: { const u32 color = rc.color_for_first_vertex; - const VertexPosition vp{m_fifo.Pop()}; + const VertexPosition vp{FifoPop()}; const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x); const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y); - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(m_fifo.Pop()) : 0); + const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); u16 orig_tex_left = ZeroExtend16(texcoord_x); u16 orig_tex_top = ZeroExtend16(texcoord_y); s32 rectangle_width; @@ -328,7 +362,7 @@ void GPU_HW::LoadVertices() break; default: { - const u32 width_and_height = m_fifo.Pop(); + const u32 width_and_height = FifoPop(); rectangle_width = static_cast(width_and_height & VRAM_WIDTH_MASK); rectangle_height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); @@ -353,25 +387,25 @@ void GPU_HW::LoadVertices() for (s32 y_offset = 0; y_offset < rectangle_height;) { const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); - const s32 quad_start_y = pos_y + y_offset; - const s32 quad_end_y = quad_start_y + quad_height; + const float quad_start_y = static_cast(pos_y + y_offset); + const float quad_end_y = quad_start_y + static_cast(quad_height); const u16 tex_bottom = tex_top + static_cast(quad_height); u16 tex_left = orig_tex_left; for (s32 x_offset = 0; x_offset < rectangle_width;) { const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); - const s32 quad_start_x = pos_x + x_offset; - const s32 quad_end_x = quad_start_x + quad_width; + const float quad_start_x = static_cast(pos_x + x_offset); + const float quad_end_x = quad_start_x + static_cast(quad_width); const u16 tex_right = tex_left + static_cast(quad_width); - AddNewVertex(quad_start_x, quad_start_y, m_current_depth, color, texpage, tex_left, tex_top); - AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top); - AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom); + AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top); + AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top); + AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom); - AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom); - AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top); - AddNewVertex(quad_end_x, quad_end_y, m_current_depth, color, texpage, tex_right, tex_bottom); + AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom); + AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top); + AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom); x_offset += quad_width; tex_left = 0; @@ -404,41 +438,41 @@ void GPU_HW::LoadVertices() if (rc.shading_enable) { color0 = rc.color_for_first_vertex; - pos0.bits = m_fifo.Pop(); - color1 = m_fifo.Pop() & UINT32_C(0x00FFFFFF); - pos1.bits = m_fifo.Pop(); + pos0.bits = FifoPop(); + color1 = FifoPop() & UINT32_C(0x00FFFFFF); + pos1.bits = FifoPop(); } else { color0 = color1 = rc.color_for_first_vertex; - pos0.bits = m_fifo.Pop(); - pos1.bits = m_fifo.Pop(); + pos0.bits = FifoPop(); + pos1.bits = FifoPop(); } if (!IsDrawingAreaIsValid()) return; - BatchVertex start, end; - start.Set(m_drawing_offset.x + pos0.x, m_drawing_offset.y + pos0.y, m_current_depth, color0, 0, 0); - end.Set(m_drawing_offset.x + pos1.x, m_drawing_offset.y + pos1.y, m_current_depth, color1, 0, 0); + s32 start_x = pos0.x + m_drawing_offset.x; + s32 start_y = pos0.y + m_drawing_offset.y; + s32 end_x = pos1.x + m_drawing_offset.x; + s32 end_y = pos1.y + m_drawing_offset.y; - const s32 min_x = std::min(start.x, end.x); - const s32 max_x = std::max(start.x, end.x); - const s32 min_y = std::min(start.y, end.y); - const s32 max_y = std::max(start.y, end.y); + const s32 min_x = std::min(start_x, end_x); + const s32 max_x = std::max(start_x, end_x); + const s32 min_y = std::min(start_y, end_y); + const s32 max_y = std::max(start_y, end_y); const s32 dx = max_x - min_x; const s32 dy = max_y - min_y; - if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT) { - Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start.x, start.y, end.x, end.y); + Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y); return; } - FixLineVertexCoordinates(start, end, dx, dy); - - AddVertex(start); - AddVertex(end); + FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy); + AddNewVertex(static_cast(start_x), static_cast(start_y), depth, 1.0f, color0, 0, + static_cast(0)); + AddNewVertex(static_cast(end_x), static_cast(end_y), depth, 1.0f, color1, 0, static_cast(0)); const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; @@ -461,37 +495,38 @@ void GPU_HW::LoadVertices() const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; - BatchVertex last_vertex; + s32 last_x, last_y; + u32 last_color; u32 buffer_pos = 0; for (u32 i = 0; i < num_vertices; i++) { const u32 color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; const VertexPosition vp{m_blit_buffer[buffer_pos++]}; - - BatchVertex vertex; - vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, 0, 0); + const s32 x = m_drawing_offset.x + vp.x; + const s32 y = m_drawing_offset.y + vp.y; if (i > 0) { - const s32 min_x = std::min(last_vertex.x, vertex.x); - const s32 max_x = std::max(last_vertex.x, vertex.x); - const s32 min_y = std::min(last_vertex.y, vertex.y); - const s32 max_y = std::max(last_vertex.y, vertex.y); + const s32 min_x = std::min(last_x, x); + const s32 max_x = std::max(last_x, x); + const s32 min_y = std::min(last_y, y); + const s32 max_y = std::max(last_y, y); const s32 dx = max_x - min_x; const s32 dy = max_y - min_y; if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT) { - Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_vertex.x, last_vertex.y, vertex.x, - vertex.y); + Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_x, last_y, x, y); } else { - BatchVertex start(last_vertex); - BatchVertex end(vertex); - FixLineVertexCoordinates(start, end, dx, dy); - AddVertex(start); - AddVertex(end); + s32 start_x = last_x, start_y = last_y; + s32 end_x = x, end_y = y; + FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy); + AddNewVertex(static_cast(start_x), static_cast(start_y), depth, 1.0f, last_color, 0, + static_cast(0)); + AddNewVertex(static_cast(end_x), static_cast(end_y), depth, 1.0f, color, 0, + static_cast(0)); const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); const u32 clip_right = @@ -505,7 +540,9 @@ void GPU_HW::LoadVertices() } } - std::memcpy(&last_vertex, &vertex, sizeof(BatchVertex)); + last_x = x; + last_y = y; + last_color = color; } } } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index ae978763e..b12cba164 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -55,24 +55,26 @@ protected: struct BatchVertex { - s32 x; - s32 y; - s32 z; + float x; + float y; + float z; + float w; u32 color; u32 texpage; u16 u; // 16-bit texcoords are needed for 256 extent rectangles u16 v; - ALWAYS_INLINE void Set(s32 x_, s32 y_, s32 z_, u32 color_, u32 texpage_, u16 packed_texcoord) + ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord) { - Set(x_, y_, z_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8)); + Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8)); } - ALWAYS_INLINE void Set(s32 x_, s32 y_, s32 z_, u32 color_, u32 texpage_, u16 u_, u16 v_) + ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_) { x = x_; y = y_; z = z_; + w = w_; color = color_; texpage = texpage_; u = u_; @@ -191,7 +193,7 @@ protected: /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. ALWAYS_INLINE float GetCurrentNormalizedVertexDepth() const { - return (static_cast(m_current_depth) / 65535.0f); + return 1.0f - (static_cast(m_current_depth) / 65535.0f); } /// Returns the interlaced mode to use when scanning out/displaying. @@ -234,7 +236,7 @@ protected: /// Handles quads with flipped texture coordinate directions. static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); - static void FixLineVertexCoordinates(BatchVertex& start, BatchVertex& end, s32 dx, s32 dy); + static void FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy); HeapArray m_vram_shadow; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index d0bb17e02..9f1ae2dd1 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -263,7 +263,7 @@ bool GPU_HW_D3D11::CreateTextureBuffer() bool GPU_HW_D3D11::CreateBatchInputLayout() { static constexpr std::array attributes = { - {{"ATTR", 0, DXGI_FORMAT_R32G32B32_SINT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0}, + {{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 393a34158..152ce2442 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -291,7 +291,7 @@ bool GPU_HW_OpenGL::CreateVertexBuffer() glEnableVertexAttribArray(1); glEnableVertexAttribArray(2); glEnableVertexAttribArray(3); - glVertexAttribIPointer(0, 3, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, x))); + glVertexAttribPointer(0, 4, GL_FLOAT, false, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, x))); glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, color))); glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, u))); diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 41ec511d7..e8108b08f 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -516,12 +516,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc const char* output_block_suffix = upscaled_lines ? "VS" : ""; if (textured) { - DeclareVertexEntryPoint(ss, {"int3 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, + DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, false, output_block_suffix); } else { - DeclareVertexEntryPoint(ss, {"int3 a_pos", "float4 a_col0"}, 1, 0, {}, false, output_block_suffix); + DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0"}, 1, 0, {}, false, output_block_suffix); } ss << R"( @@ -532,9 +532,10 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc float vertex_offset = (RESOLUTION_SCALE == 1u) ? 0.5 : 0.0; // 0..+1023 -> -1..1 - float pos_x = ((float(a_pos.x) + vertex_offset) / 512.0) - 1.0; - float pos_y = ((float(a_pos.y) + vertex_offset) / -256.0) + 1.0; - float pos_z = 1.0 - (float(a_pos.z) / 65535.0); + float pos_x = ((a_pos.x + vertex_offset) / 512.0) - 1.0; + float pos_y = ((a_pos.y + vertex_offset) / -256.0) + 1.0; + float pos_z = a_pos.z; + float pos_w = a_pos.w; #if API_OPENGL || API_OPENGL_ES // OpenGL seems to be off by one pixel in the Y direction due to lower-left origin, but only on @@ -550,7 +551,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc pos_y = -pos_y; #endif - v_pos = float4(pos_x, pos_y, pos_z, 1.0); + v_pos = float4(pos_x * pos_w, pos_y * pos_w, pos_z * pos_w, pos_w); v_col0 = a_col0; #if TEXTURED diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 6eceb0b1c..26d7f452e 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -669,7 +669,7 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetRenderPass(m_vram_render_pass, 0); gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX); - gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32_SINT, offsetof(BatchVertex, x)); + gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(BatchVertex, x)); gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color)); if (textured) { diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 545270750..c68a4f975 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -227,18 +227,18 @@ void GPU_SW::DispatchRenderCommand() for (u32 i = 0; i < num_vertices; i++) { SWVertex& vert = vertices[i]; - const u32 color_rgb = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; + const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; vert.color_r = Truncate8(color_rgb); vert.color_g = Truncate8(color_rgb >> 8); vert.color_b = Truncate8(color_rgb >> 16); - const VertexPosition vp{m_fifo.Pop()}; + const VertexPosition vp{FifoPop()}; vert.x = vp.x; vert.y = vp.y; if (textured) { - std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(m_fifo.Pop())); + std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop())); } else { @@ -262,8 +262,8 @@ void GPU_SW::DispatchRenderCommand() case Primitive::Rectangle: { const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex); - const VertexPosition vp{m_fifo.Pop()}; - const u32 texcoord_and_palette = rc.texture_enable ? m_fifo.Pop() : 0; + const VertexPosition vp{FifoPop()}; + const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0; const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette)); s32 width; @@ -284,7 +284,7 @@ void GPU_SW::DispatchRenderCommand() break; default: { - const u32 width_and_height = m_fifo.Pop(); + const u32 width_and_height = FifoPop(); width = static_cast(width_and_height & VRAM_WIDTH_MASK); height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); @@ -321,7 +321,7 @@ void GPU_SW::DispatchRenderCommand() // first vertex SWVertex* p0 = &vertices[0]; SWVertex* p1 = &vertices[1]; - p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : m_fifo.Pop()}); + p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())}); p0->SetColorRGB24(first_color); // remaining vertices in line strip @@ -335,8 +335,8 @@ void GPU_SW::DispatchRenderCommand() } else { - p1->SetColorRGB24(shaded ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(VertexPosition{m_fifo.Pop()}); + p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color); + p1->SetPosition(VertexPosition{Truncate32(FifoPop())}); } // down here because of the FIFO pops diff --git a/src/core/gte.cpp b/src/core/gte.cpp index 1b2a0d2d5..12cda54ad 100644 --- a/src/core/gte.cpp +++ b/src/core/gte.cpp @@ -3,6 +3,7 @@ #include "common/bitutils.h" #include "common/state_wrapper.h" #include "cpu_core.h" +#include "pgxp.h" #include "settings.h" #include #include @@ -621,6 +622,21 @@ static void RTPS(const s16 V[3], u8 shift, bool lm, bool last) CheckMACOverflow<0>(Sy); PushSXY(s32(Sx >> 16), s32(Sy >> 16)); + if (g_settings.gpu_pgxp_enable) + { + // this can potentially use increased precision on Z + const float precise_z = std::max((float)REGS.H / 2.f, (float)REGS.SZ3); + const float precise_h_div_sz = (float)REGS.H / precise_z; + const float fofx = ((float)REGS.OFX / (float)(1 << 16)); + const float fofy = ((float)REGS.OFY / (float)(1 << 16)); + float precise_x = fofx + ((float)REGS.IR1 * precise_h_div_sz) * ((g_settings.gpu_widescreen_hack) ? 0.75f : 1.00f); + float precise_y = fofy + ((float)REGS.IR2 * precise_h_div_sz); + + precise_x = std::clamp(precise_x, -0x400, 0x3ff); + precise_y = std::clamp(precise_y, -0x400, 0x3ff); + PGXP::GTE_PushSXYZ2f(precise_x, precise_y, precise_z, REGS.dr32[14]); + } + if (last) { // MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h @@ -664,6 +680,19 @@ static void Execute_NCLIP(Instruction inst) REGS.FLAG.UpdateError(); } +static void Execute_NCLIP_PGXP(Instruction inst) +{ + if (PGXP::GTE_NCLIP_valid(REGS.dr32[12], REGS.dr32[13], REGS.dr32[14])) + { + REGS.FLAG.Clear(); + REGS.MAC0 = static_cast(PGXP::GTE_NCLIP()); + } + else + { + Execute_NCLIP(inst); + } +} + static void Execute_AVSZ3(Instruction inst) { REGS.FLAG.Clear(); @@ -994,8 +1023,13 @@ void ExecuteInstruction(u32 inst_bits) break; case 0x06: - Execute_NCLIP(inst); - break; + { + if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling) + Execute_NCLIP_PGXP(inst); + else + Execute_NCLIP(inst); + } + break; case 0x0C: Execute_OP(inst); @@ -1092,7 +1126,12 @@ InstructionImpl GetInstructionImpl(u32 inst_bits) return &Execute_RTPS; case 0x06: - return &Execute_NCLIP; + { + if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling) + return &Execute_NCLIP_PGXP; + else + return &Execute_NCLIP; + } case 0x0C: return &Execute_OP; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 4f0d3c6e6..6e945922c 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -8,12 +8,13 @@ #include "common/log.h" #include "common/string_util.h" #include "controller.h" -#include "cpu_core.h" #include "cpu_code_cache.h" +#include "cpu_core.h" #include "dma.h" #include "gpu.h" #include "gte.h" #include "host_display.h" +#include "pgxp.h" #include "save_state_version.h" #include "system.h" #include @@ -367,6 +368,10 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetBoolValue("GPU", "DisableInterlacing", false); si.SetBoolValue("GPU", "ForceNTSCTimings", false); si.SetBoolValue("GPU", "WidescreenHack", false); + si.SetBoolValue("GPU", "PGXPEnable", false); + si.SetBoolValue("GPU", "PGXPCulling", true); + si.SetBoolValue("GPU", "PGXPTextureCorrection", true); + si.SetBoolValue("GPU", "PGXPVertexCache", false); si.SetStringValue("Display", "CropMode", Settings::GetDisplayCropModeName(Settings::DEFAULT_DISPLAY_CROP_MODE)); si.SetStringValue("Display", "AspectRatio", @@ -485,6 +490,19 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) g_gpu->UpdateSettings(); } + if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || + (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling != old_settings.gpu_pgxp_culling)) + { + if (g_settings.IsUsingCodeCache()) + { + ReportFormattedMessage("PGXP %s, recompiling all blocks.", g_settings.gpu_pgxp_enable ? "enabled" : "disabled"); + CPU::CodeCache::Flush(); + } + + if (g_settings.gpu_pgxp_enable) + PGXP::Initialize(); + } + if (g_settings.cdrom_read_thread != old_settings.cdrom_read_thread) g_cdrom.SetUseReadThread(g_settings.cdrom_read_thread); @@ -625,8 +643,7 @@ void HostInterface::ToggleSoftwareRendering() if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) return; - const GPURenderer new_renderer = - g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; + const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; AddFormattedOSDMessage(2.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer)); System::RecreateGPU(new_renderer); diff --git a/src/core/pgxp.cpp b/src/core/pgxp.cpp new file mode 100644 index 000000000..7ca7c4f94 --- /dev/null +++ b/src/core/pgxp.cpp @@ -0,0 +1,800 @@ +/*************************************************************************** + * Original copyright notice from PGXP code from Beetle PSX. * + * Copyright (C) 2016 by iCatButler * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * + ***************************************************************************/ + +#include "pgxp.h" +#include "settings.h" +#include + +namespace PGXP { +// pgxp_types.h +typedef struct PGXP_value_Tag +{ + float x; + float y; + float z; + union + { + unsigned int flags; + unsigned char compFlags[4]; + unsigned short halfFlags[2]; + }; + unsigned int count; + unsigned int value; + + unsigned short gFlags; + unsigned char lFlags; + unsigned char hFlags; +} PGXP_value; + +// pgxp_value.h +typedef union +{ + struct + { + u8 l, h, h2, h3; + } b; + struct + { + u16 l, h; + } w; + struct + { + s8 l, h, h2, h3; + } sb; + struct + { + s16 l, h; + } sw; + u32 d; + s32 sd; +} psx_value; + +typedef enum +{ + UNINITIALISED = 0, + INVALID_PSX_VALUE = 1, + INVALID_ADDRESS = 2, + INVALID_BITWISE_OP = 3, + DIVIDE_BY_ZERO = 4, + INVALID_8BIT_LOAD = 5, + INVALID_8BIT_STORE = 6 +} PGXP_error_states; + +#define NONE 0 +#define ALL 0xFFFFFFFF +#define VALID 1 +#define VALID_0 (VALID << 0) +#define VALID_1 (VALID << 8) +#define VALID_2 (VALID << 16) +#define VALID_3 (VALID << 24) +#define VALID_01 (VALID_0 | VALID_1) +#define VALID_012 (VALID_0 | VALID_1 | VALID_2) +#define VALID_ALL (VALID_0 | VALID_1 | VALID_2 | VALID_3) +#define INV_VALID_ALL (ALL ^ VALID_ALL) + +static const PGXP_value PGXP_value_invalid_address = {0.f, 0.f, 0.f, {0}, 0, 0, INVALID_ADDRESS, 0, 0}; +static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, {0}, 0, VALID_ALL, 0, 0, 0}; + +static void Validate(PGXP_value* pV, u32 psxV); +static void MaskValidate(PGXP_value* pV, u32 psxV, u32 mask, u32 validMask); + +typedef union +{ + struct + { + s16 x; + s16 y; + }; + struct + { + u16 ux; + u16 uy; + }; + u32 word; +} low_value; + +// pgxp_mem.h +static u32 PGXP_ConvertAddress(u32 addr); +static PGXP_value* GetPtr(u32 addr); +static PGXP_value* ReadMem(u32 addr); + +static void ValidateAndCopyMem(PGXP_value* dest, u32 addr, u32 value); +static void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value, int sign); + +static void WriteMem(PGXP_value* value, u32 addr); +static void WriteMem16(PGXP_value* src, u32 addr); + +// pgxp_gpu.h +void PGXP_CacheVertex(short sx, short sy, const PGXP_value* _pVertex); + +// pgxp_gte.h +static void PGXP_InitGTE(); + +// pgxp_cpu.h +static void PGXP_InitCPU(); +static PGXP_value CPU_reg_mem[34]; +#define CPU_Hi CPU_reg[33] +#define CPU_Lo CPU_reg[34] +static PGXP_value CP0_reg_mem[32]; + +static PGXP_value* CPU_reg = CPU_reg_mem; +static PGXP_value* CP0_reg = CP0_reg_mem; + +// pgxp_value.c +void Validate(PGXP_value* pV, u32 psxV) +{ + // assume pV is not NULL + pV->flags &= (pV->value == psxV) ? ALL : INV_VALID_ALL; +} + +void MaskValidate(PGXP_value* pV, u32 psxV, u32 mask, u32 validMask) +{ + // assume pV is not NULL + pV->flags &= ((pV->value & mask) == (psxV & mask)) ? ALL : (ALL ^ (validMask)); +} + +// pgxp_mem.c +static void PGXP_InitMem(); +static PGXP_value Mem[3 * 2048 * 1024 / 4]; // mirror 2MB in 32-bit words * 3 +static const u32 UserMemOffset = 0; +static const u32 ScratchOffset = 2048 * 1024 / 4; +static const u32 RegisterOffset = 2 * 2048 * 1024 / 4; +static const u32 InvalidAddress = 3 * 2048 * 1024 / 4; + +void PGXP_InitMem() +{ + memset(Mem, 0, sizeof(Mem)); +} + +u32 PGXP_ConvertAddress(u32 addr) +{ + u32 paddr = addr; + switch (paddr >> 24) + { + case 0x80: + case 0xa0: + case 0x00: + // RAM further mirrored over 8MB + paddr = ((paddr & 0x7FFFFF) % 0x200000) >> 2; + paddr = UserMemOffset + paddr; + break; + default: + if ((paddr >> 20) == 0x1f8) + { + if (paddr >= 0x1f801000) + { + // paddr = ((paddr & 0xFFFF) - 0x1000); + // paddr = (paddr % 0x2000) >> 2; + paddr = ((paddr & 0xFFFF) - 0x1000) >> 2; + paddr = RegisterOffset + paddr; + break; + } + else + { + // paddr = ((paddr & 0xFFF) % 0x400) >> 2; + paddr = (paddr & 0x3FF) >> 2; + paddr = ScratchOffset + paddr; + break; + } + } + + paddr = InvalidAddress; + break; + } + +#ifdef GTE_LOG + // GTE_LOG("PGXP_Read %x [%x] |", addr, paddr); +#endif + + return paddr; +} + +PGXP_value* GetPtr(u32 addr) +{ + addr = PGXP_ConvertAddress(addr); + + if (addr != InvalidAddress) + return &Mem[addr]; + return NULL; +} + +PGXP_value* ReadMem(u32 addr) +{ + return GetPtr(addr); +} + +void ValidateAndCopyMem(PGXP_value* dest, u32 addr, u32 value) +{ + PGXP_value* pMem = GetPtr(addr); + if (pMem != NULL) + { + Validate(pMem, value); + *dest = *pMem; + return; + } + + *dest = PGXP_value_invalid_address; +} + +void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value, int sign) +{ + u32 validMask = 0; + psx_value val, mask; + PGXP_value* pMem = GetPtr(addr); + if (pMem != NULL) + { + mask.d = val.d = 0; + // determine if high or low word + if ((addr % 4) == 2) + { + val.w.h = static_cast(value); + mask.w.h = 0xFFFF; + validMask = VALID_1; + } + else + { + val.w.l = static_cast(value); + mask.w.l = 0xFFFF; + validMask = VALID_0; + } + + // validate and copy whole value + MaskValidate(pMem, val.d, mask.d, validMask); + *dest = *pMem; + + // if high word then shift + if ((addr % 4) == 2) + { + dest->x = dest->y; + dest->lFlags = dest->hFlags; + dest->compFlags[0] = dest->compFlags[1]; + } + + // truncate value + dest->y = (dest->x < 0) ? -1.f * sign : 0.f; // 0.f; + dest->hFlags = 0; + dest->value = value; + dest->compFlags[1] = VALID; // iCB: High word is valid, just 0 + return; + } + + *dest = PGXP_value_invalid_address; +} + +void WriteMem(PGXP_value* value, u32 addr) +{ + PGXP_value* pMem = GetPtr(addr); + + if (pMem) + *pMem = *value; +} + +void WriteMem16(PGXP_value* src, u32 addr) +{ + PGXP_value* dest = GetPtr(addr); + psx_value* pVal = NULL; + + if (dest) + { + pVal = (psx_value*)&dest->value; + // determine if high or low word + if ((addr % 4) == 2) + { + dest->y = src->x; + dest->hFlags = src->lFlags; + dest->compFlags[1] = src->compFlags[0]; + pVal->w.h = (u16)src->value; + } + else + { + dest->x = src->x; + dest->lFlags = src->lFlags; + dest->compFlags[0] = src->compFlags[0]; + pVal->w.l = (u16)src->value; + } + + // overwrite z/w if valid + if (src->compFlags[2] == VALID) + { + dest->z = src->z; + dest->compFlags[2] = src->compFlags[2]; + } + + // dest->valid = dest->valid && src->valid; + dest->gFlags |= src->gFlags; // inherit flags from both values (?) + } +} + +// pgxp_main.c +u32 static gMode = 0; + +void Initialize() +{ + PGXP_InitMem(); + PGXP_InitCPU(); + PGXP_InitGTE(); +} + +void PGXP_SetModes(u32 modes) +{ + gMode = modes; +} + +u32 PGXP_GetModes() +{ + return gMode; +} + +void PGXP_EnableModes(u32 modes) +{ + gMode |= modes; +} + +void PGXP_DisableModes(u32 modes) +{ + gMode = gMode & ~modes; +} + +// pgxp_gte.c + +// GTE registers +static PGXP_value GTE_data_reg_mem[32]; +static PGXP_value GTE_ctrl_reg_mem[32]; + +static PGXP_value* GTE_data_reg = GTE_data_reg_mem; +static PGXP_value* GTE_ctrl_reg = GTE_ctrl_reg_mem; + +void PGXP_InitGTE() +{ + memset(GTE_data_reg_mem, 0, sizeof(GTE_data_reg_mem)); + memset(GTE_ctrl_reg_mem, 0, sizeof(GTE_ctrl_reg_mem)); +} + +// Instruction register decoding +#define op(_instr) (_instr >> 26) // The op part of the instruction register +#define func(_instr) ((_instr)&0x3F) // The funct part of the instruction register +#define sa(_instr) ((_instr >> 6) & 0x1F) // The sa part of the instruction register +#define rd(_instr) ((_instr >> 11) & 0x1F) // The rd part of the instruction register +#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register +#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register +#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register + +#define SX0 (GTE_data_reg[12].x) +#define SY0 (GTE_data_reg[12].y) +#define SX1 (GTE_data_reg[13].x) +#define SY1 (GTE_data_reg[13].y) +#define SX2 (GTE_data_reg[14].x) +#define SY2 (GTE_data_reg[14].y) + +#define SXY0 (GTE_data_reg[12]) +#define SXY1 (GTE_data_reg[13]) +#define SXY2 (GTE_data_reg[14]) +#define SXYP (GTE_data_reg[15]) + +void GTE_PushSXYZ2f(float _x, float _y, float _z, unsigned int _v) +{ + static unsigned int uCount = 0; + low_value temp; + // push values down FIFO + SXY0 = SXY1; + SXY1 = SXY2; + + SXY2.x = _x; + SXY2.y = _y; + SXY2.z = _z; + SXY2.value = _v; + SXY2.flags = VALID_ALL; + SXY2.count = uCount++; + + // cache value in GPU plugin + temp.word = _v; + if (g_settings.gpu_pgxp_vertex_cache) + PGXP_CacheVertex(temp.x, temp.y, &SXY2); + else + PGXP_CacheVertex(0, 0, NULL); + +#ifdef GTE_LOG + GTE_LOG("PGXP_PUSH (%f, %f) %u %u|", SXY2.x, SXY2.y, SXY2.flags, SXY2.count); +#endif +} + +void GTE_PushSXYZ2s(s64 _x, s64 _y, s64 _z, u32 v) +{ + float fx = (float)(_x) / (float)(1 << 16); + float fy = (float)(_y) / (float)(1 << 16); + float fz = (float)(_z); + + // if(Config.PGXP_GTE) + GTE_PushSXYZ2f(fx, fy, fz, v); +} + +#define VX(n) (psxRegs.CP2D.p[n << 1].sw.l) +#define VY(n) (psxRegs.CP2D.p[n << 1].sw.h) +#define VZ(n) (psxRegs.CP2D.p[(n << 1) + 1].sw.l) + +int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2) +{ + Validate(&SXY0, sxy0); + Validate(&SXY1, sxy1); + Validate(&SXY2, sxy2); + if (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_01) == VALID_01)) // && Config.PGXP_GTE && (Config.PGXP_Mode > 0)) + return 1; + return 0; +} + +float GTE_NCLIP() +{ + float nclip = ((SX0 * SY1) + (SX1 * SY2) + (SX2 * SY0) - (SX0 * SY2) - (SX1 * SY0) - (SX2 * SY1)); + + // ensure fractional values are not incorrectly rounded to 0 + float nclipAbs = std::abs(nclip); + if ((0.1f < nclipAbs) && (nclipAbs < 1.f)) + nclip += (nclip < 0.f ? -1 : 1); + + // float AX = SX1 - SX0; + // float AY = SY1 - SY0; + + // float BX = SX2 - SX0; + // float BY = SY2 - SY0; + + //// normalise A and B + // float mA = sqrt((AX*AX) + (AY*AY)); + // float mB = sqrt((BX*BX) + (BY*BY)); + + //// calculate AxB to get Z component of C + // float CZ = ((AX * BY) - (AY * BX)) * (1 << 12); + + return nclip; +} + +static void PGXP_MTC2_int(PGXP_value value, u32 reg) +{ + switch (reg) + { + case 15: + // push FIFO + SXY0 = SXY1; + SXY1 = SXY2; + SXY2 = value; + SXYP = SXY2; + break; + + case 31: + return; + } + + GTE_data_reg[reg] = value; +} + +//////////////////////////////////// +// Data transfer tracking +//////////////////////////////////// + +void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal) +{ + // CPU[Rt] = GTE_D[Rd] + Validate(>E_data_reg[rd(instr)], rdVal); + CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)]; + CPU_reg[rt(instr)].value = rtVal; +} + +void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal) +{ + // GTE_D[Rd] = CPU[Rt] + Validate(&CPU_reg[rt(instr)], rtVal); + PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr)); + GTE_data_reg[rd(instr)].value = rdVal; +} + +void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal) +{ + // CPU[Rt] = GTE_C[Rd] + Validate(>E_ctrl_reg[rd(instr)], rdVal); + CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)]; + CPU_reg[rt(instr)].value = rtVal; +} + +void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal) +{ + // GTE_C[Rd] = CPU[Rt] + Validate(&CPU_reg[rt(instr)], rtVal); + GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)]; + GTE_ctrl_reg[rd(instr)].value = rdVal; +} + +//////////////////////////////////// +// Memory Access +//////////////////////////////////// +void CPU_LWC2(u32 instr, u32 rtVal, u32 addr) +{ + // GTE_D[Rt] = Mem[addr] + PGXP_value val; + ValidateAndCopyMem(&val, addr, rtVal); + PGXP_MTC2_int(val, rt(instr)); +} + +void CPU_SWC2(u32 instr, u32 rtVal, u32 addr) +{ + // Mem[addr] = GTE_D[Rt] + Validate(>E_data_reg[rt(instr)], rtVal); + WriteMem(>E_data_reg[rt(instr)], addr); +} + +// pgxp_gpu.c +///////////////////////////////// +//// Blade_Arma's Vertex Cache (CatBlade?) +///////////////////////////////// +const unsigned int mode_init = 0; +const unsigned int mode_write = 1; +const unsigned int mode_read = 2; +const unsigned int mode_fail = 3; + +PGXP_value vertexCache[0x800 * 2][0x800 * 2]; + +unsigned int baseID = 0; +unsigned int lastID = 0; +unsigned int cacheMode = 0; + +unsigned int IsSessionID(unsigned int vertID) +{ + // No wrapping + if (lastID >= baseID) + return (vertID >= baseID); + + // If vertID is >= baseID it is pre-wrap and in session + if (vertID >= baseID) + return 1; + + // vertID is < baseID, If it is <= lastID it is post-wrap and in session + if (vertID <= lastID) + return 1; + + return 0; +} + +void PGXP_CacheVertex(short sx, short sy, const PGXP_value* _pVertex) +{ + const PGXP_value* pNewVertex = (const PGXP_value*)_pVertex; + PGXP_value* pOldVertex = NULL; + + if (!pNewVertex) + { + cacheMode = mode_fail; + return; + } + + // if (bGteAccuracy) + { + if (cacheMode != mode_write) + { + // Initialise cache on first use + if (cacheMode == mode_init) + memset(vertexCache, 0x00, sizeof(vertexCache)); + + // First vertex of write session (frame?) + cacheMode = mode_write; + baseID = pNewVertex->count; + } + + lastID = pNewVertex->count; + + if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff) + { + pOldVertex = &vertexCache[sy + 0x800][sx + 0x800]; + + // To avoid ambiguity there can only be one valid entry per-session + if (0) //(IsSessionID(pOldVertex->count) && (pOldVertex->value == pNewVertex->value)) + { + // check to ensure this isn't identical + if ((fabsf(pOldVertex->x - pNewVertex->x) > 0.1f) || (fabsf(pOldVertex->y - pNewVertex->y) > 0.1f) || + (fabsf(pOldVertex->z - pNewVertex->z) > 0.1f)) + { + *pOldVertex = *pNewVertex; + pOldVertex->gFlags = 5; + return; + } + } + + // Write vertex into cache + *pOldVertex = *pNewVertex; + pOldVertex->gFlags = 1; + } + } +} + +PGXP_value* PGXP_GetCachedVertex(short sx, short sy) +{ + // if (bGteAccuracy) + { + if (cacheMode != mode_read) + { + if (cacheMode == mode_fail) + return NULL; + + // Initialise cache on first use + if (cacheMode == mode_init) + memset(vertexCache, 0x00, sizeof(vertexCache)); + + // First vertex of read session (frame?) + cacheMode = mode_read; + } + + if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff) + { + // Return pointer to cache entry + return &vertexCache[sy + 0x800][sx + 0x800]; + } + } + + return NULL; +} + +static float TruncateVertexPosition(float p) +{ + const s32 int_part = static_cast(p); + const float int_part_f = static_cast(int_part); + return static_cast(static_cast(int_part << 5) >> 5) + (p - int_part_f); +} + +bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, float* out_w) +{ + const PGXP_value* vert = ReadMem(addr); + if (vert && ((vert->flags & VALID_01) == VALID_01) && (vert->value == value)) + { + // There is a value here with valid X and Y coordinates + *out_x = TruncateVertexPosition(vert->x) + static_cast(xOffs); + *out_y = TruncateVertexPosition(vert->y) + static_cast(yOffs); + *out_w = vert->z / 32768.0f; + + // This value does not have a valid W coordinate + return ((vert->flags & VALID_2) == VALID_2); + } + else + { + const short psx_x = (short)(value & 0xFFFFu); + const short psx_y = (short)(value >> 16); + + // Look in cache for valid vertex + vert = PGXP_GetCachedVertex(psx_x, psx_y); + if ((vert) && /*(IsSessionID(vert->count)) &&*/ (vert->gFlags == 1)) + { + // a value is found, it is from the current session and is unambiguous (there was only one value recorded at that + // position) + *out_x = TruncateVertexPosition(vert->x) + static_cast(xOffs); + *out_y = TruncateVertexPosition(vert->y) + static_cast(yOffs); + *out_w = vert->z / 32768.0f; + return false; // iCB: Getting the wrong w component causes too great an error when using perspective correction + // so disable it + } + else + { + // no valid value can be found anywhere, use the native PSX data + *out_x = static_cast(x); + *out_y = static_cast(y); + *out_w = 1.0f; + return false; + } + } +} + +// pgxp_cpu.c + +// Instruction register decoding +#define op(_instr) (_instr >> 26) // The op part of the instruction register +#define func(_instr) ((_instr)&0x3F) // The funct part of the instruction register +#define sa(_instr) ((_instr >> 6) & 0x1F) // The sa part of the instruction register +#define rd(_instr) ((_instr >> 11) & 0x1F) // The rd part of the instruction register +#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register +#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register +#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register + +void PGXP_InitCPU() +{ + memset(CPU_reg_mem, 0, sizeof(CPU_reg_mem)); + memset(CP0_reg_mem, 0, sizeof(CP0_reg_mem)); +} + +// invalidate register (invalid 8 bit read) +static void InvalidLoad(u32 addr, u32 code, u32 value) +{ + u32 reg = ((code >> 16) & 0x1F); // The rt part of the instruction register + PGXP_value* pD = NULL; + PGXP_value p; + + p.x = p.y = -1337; // default values + + // p.valid = 0; + // p.count = value; + pD = ReadMem(addr); + + if (pD) + { + p.count = addr; + p = *pD; + } + else + { + p.count = value; + } + + p.flags = 0; + + // invalidate register + CPU_reg[reg] = p; +} + +// invalidate memory address (invalid 8 bit write) +static void InvalidStore(u32 addr, u32 code, u32 value) +{ + u32 reg = ((code >> 16) & 0x1F); // The rt part of the instruction register + PGXP_value* pD = NULL; + PGXP_value p; + + pD = ReadMem(addr); + + p.x = p.y = -2337; + + if (pD) + p = *pD; + + p.flags = 0; + p.count = (reg * 1000) + value; + + // invalidate memory + WriteMem(&p, addr); +} + +void CPU_LW(u32 instr, u32 rtVal, u32 addr) +{ + // Rt = Mem[Rs + Im] + ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal); +} + +void CPU_LBx(u32 instr, u32 rtVal, u32 addr) +{ + InvalidLoad(addr, instr, 116); +} + +void CPU_LHx(u32 instr, u32 rtVal, u32 addr) +{ + // Rt = Mem[Rs + Im] (sign/zero extended) + ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1); +} + +void CPU_SB(u32 instr, u8 rtVal, u32 addr) +{ + InvalidStore(addr, instr, 208); +} + +void CPU_SH(u32 instr, u16 rtVal, u32 addr) +{ + // validate and copy half value + MaskValidate(&CPU_reg[rt(instr)], rtVal, 0xFFFF, VALID_0); + WriteMem16(&CPU_reg[rt(instr)], addr); +} + +void CPU_SW(u32 instr, u32 rtVal, u32 addr) +{ + // Mem[Rs + Im] = Rt + Validate(&CPU_reg[rt(instr)], rtVal); + WriteMem(&CPU_reg[rt(instr)], addr); +} + +} // namespace PGXP \ No newline at end of file diff --git a/src/core/pgxp.h b/src/core/pgxp.h new file mode 100644 index 000000000..02b996615 --- /dev/null +++ b/src/core/pgxp.h @@ -0,0 +1,54 @@ +/*************************************************************************** + * Original copyright notice from PGXP code from Beetle PSX. * + * Copyright (C) 2016 by iCatButler * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * + ***************************************************************************/ + +#pragma once +#include "types.h" + +namespace PGXP { + +void Initialize(); + +// -- GTE functions +// Transforms +void GTE_PushSXYZ2f(float _x, float _y, float _z, unsigned int _v); +void GTE_PushSXYZ2s(s64 _x, s64 _y, s64 _z, u32 v); +int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2); +float GTE_NCLIP(); + +// Data transfer tracking +void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE data reg to GPR reg (MFC2) +void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE data reg (MTC2) +void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE ctrl reg to GPR reg (CFC2) +void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE ctrl reg (CTC2) +// Memory Access +void CPU_LWC2(u32 instr, u32 rtVal, u32 addr); // copy memory to GTE reg +void CPU_SWC2(u32 instr, u32 rtVal, u32 addr); // copy GTE reg to memory + +bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, float* out_w); + +// -- CPU functions +void CPU_LW(u32 instr, u32 rtVal, u32 addr); +void CPU_LHx(u32 instr, u32 rtVal, u32 addr); +void CPU_LBx(u32 instr, u32 rtVal, u32 addr); +void CPU_SB(u32 instr, u8 rtVal, u32 addr); +void CPU_SH(u32 instr, u16 rtVal, u32 addr); +void CPU_SW(u32 instr, u32 rtVal, u32 addr); + +} // namespace PGXP \ No newline at end of file diff --git a/src/core/settings.cpp b/src/core/settings.cpp index ac8f209f1..dac561636 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -101,6 +101,10 @@ void Settings::Load(SettingsInterface& si) gpu_disable_interlacing = si.GetBoolValue("GPU", "DisableInterlacing", false); gpu_force_ntsc_timings = si.GetBoolValue("GPU", "ForceNTSCTimings", false); gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false); + gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false); + gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true); + gpu_pgxp_texture_correction = si.GetBoolValue("GPU", "PGXPTextureCorrection", true); + gpu_pgxp_vertex_cache = si.GetBoolValue("GPU", "PGXPVertexCache", false); display_crop_mode = ParseDisplayCropMode( @@ -203,6 +207,10 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "DisableInterlacing", gpu_disable_interlacing); si.SetBoolValue("GPU", "ForceNTSCTimings", gpu_force_ntsc_timings); si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack); + si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable); + si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling); + si.SetBoolValue("GPU", "PGXPTextureCorrection", gpu_pgxp_texture_correction); + si.SetBoolValue("GPU", "PGXPVertexCache", gpu_pgxp_vertex_cache); si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode)); si.SetStringValue("Display", "AspectRatio", GetDisplayAspectRatioName(display_aspect_ratio)); diff --git a/src/core/settings.h b/src/core/settings.h index 2e6b24d25..d0a010811 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -88,6 +88,10 @@ struct Settings bool gpu_disable_interlacing = false; bool gpu_force_ntsc_timings = false; bool gpu_widescreen_hack = false; + bool gpu_pgxp_enable = false; + bool gpu_pgxp_culling = true; + bool gpu_pgxp_texture_correction = true; + bool gpu_pgxp_vertex_cache = false; DisplayCropMode display_crop_mode = DisplayCropMode::None; DisplayAspectRatio display_aspect_ratio = DisplayAspectRatio::R4_3; bool display_linear_filtering = true; @@ -146,6 +150,7 @@ struct Settings bool log_to_window = false; bool log_to_file = false; + ALWAYS_INLINE bool IsUsingCodeCache() const { return (cpu_execution_mode != CPUExecutionMode::Interpreter); } ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); } ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); } diff --git a/src/duckstation-libretro/libretro_host_interface.cpp b/src/duckstation-libretro/libretro_host_interface.cpp index 41481984a..d60664d20 100644 --- a/src/duckstation-libretro/libretro_host_interface.cpp +++ b/src/duckstation-libretro/libretro_host_interface.cpp @@ -352,7 +352,7 @@ void LibretroHostInterface::OnSystemDestroyed() m_using_hardware_renderer = false; } -static std::array s_option_definitions = {{ +static std::array s_option_definitions = {{ {"Console.Region", "Console Region", "Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.", @@ -447,6 +447,29 @@ static std::array s_option_definitions = {{ "backgrounds, this enhancement will not work as expected.", {{"true", "Enabled"}, {"false", "Disabled"}}, "false"}, + {"GPU.PGXPEnable", + "PGXP Geometry Correction", + "Reduces \"wobbly\" polygons by attempting to preserve the fractional component through memory transfers. Only " + "works with the hardware renderers, and may not be compatible with all games.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "false"}, + {"GPU.PGXPCulling", + "PGXP Culling Correction", + "Increases the precision of polygon culling, reducing the number of holes in geometry. Requires geometry correction " + "enabled.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "true"}, + {"GPU.PGXPTextureCorrection", + "PGXP Texture Correction", + "Uses perspective-correct interpolation for texture coordinates and colors, straightening out warped textures. " + "Requires geometry correction enabled.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "true"}, + {"GPU.PGXPVertexCache", + "PGXP Vertex Cache", + "Uses screen coordinates as a fallback when tracking vertices through memory fails. May improve PGXP compatibility.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "false"}, {"Display.CropMode", "Crop Mode", "Changes how much of the image is cropped. Some games display garbage in the overscan area which is typically " diff --git a/src/duckstation-qt/gpusettingswidget.cpp b/src/duckstation-qt/gpusettingswidget.cpp index 82af959b1..cf61e5f65 100644 --- a/src/duckstation-qt/gpusettingswidget.cpp +++ b/src/duckstation-qt/gpusettingswidget.cpp @@ -40,11 +40,20 @@ GPUSettingsWidget::GPUSettingsWidget(QtHostInterface* host_interface, QWidget* p "TextureFiltering"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.widescreenHack, "GPU", "WidescreenHack"); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpEnable, "GPU", "PGXPEnable", false); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpCulling, "GPU", "PGXPCulling", true); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpTextureCorrection, "GPU", + "PGXPTextureCorrection", true); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpVertexCache, "GPU", "PGXPVertexCache", false); + connect(m_ui.resolutionScale, QOverload::of(&QComboBox::currentIndexChanged), this, &GPUSettingsWidget::updateScaledDitheringEnabled); connect(m_ui.trueColor, &QCheckBox::stateChanged, this, &GPUSettingsWidget::updateScaledDitheringEnabled); updateScaledDitheringEnabled(); + connect(m_ui.pgxpEnable, &QCheckBox::stateChanged, this, &GPUSettingsWidget::updatePGXPSettingsEnabled); + updatePGXPSettingsEnabled(); + connect(m_ui.renderer, QOverload::of(&QComboBox::currentIndexChanged), this, &GPUSettingsWidget::populateGPUAdapters); connect(m_ui.adapter, QOverload::of(&QComboBox::currentIndexChanged), this, @@ -126,6 +135,19 @@ GPUSettingsWidget::GPUSettingsWidget(QtHostInterface* host_interface, QWidget* p tr("Scales vertex positions in screen-space to a widescreen aspect ratio, essentially " "increasing the field of view from 4:3 to 16:9 in 3D games. For 2D games, or games which " "use pre-rendered backgrounds, this enhancement will not work as expected.")); + dialog->registerWidgetHelp( + m_ui.pgxpEnable, tr("Geometry Correction"), tr("Unchecked"), + tr("Reduces \"wobbly\" polygons by attempting to preserve the fractional component through memory transfers. Only " + "works with the hardware renderers, and may not be compatible with all games.")); + dialog->registerWidgetHelp(m_ui.pgxpCulling, tr("Culling Correction"), tr("Checked"), + tr("Increases the precision of polygon culling, reducing the number of holes in geometry. " + "Requires geometry correction enabled.")); + dialog->registerWidgetHelp(m_ui.pgxpTextureCorrection, tr("Texture Correction"), tr("Checked"), + tr("Uses perspective-correct interpolation for texture coordinates and colors, " + "straightening out warped textures. Requires geometry correction enabled.")); + dialog->registerWidgetHelp(m_ui.pgxpVertexCache, tr("Vertex Cache"), tr("Unchecked"), + tr("Uses screen coordinates as a fallback when tracking vertices through memory fails. " + "May improve PGXP compatibility.")); } GPUSettingsWidget::~GPUSettingsWidget() = default; @@ -232,3 +254,11 @@ void GPUSettingsWidget::onGPUAdapterIndexChanged() m_host_interface->SetStringSettingValue("GPU", "Adapter", m_ui.adapter->currentText().toUtf8().constData()); } + +void GPUSettingsWidget::updatePGXPSettingsEnabled() +{ + const bool enabled = m_ui.pgxpEnable->isChecked(); + m_ui.pgxpCulling->setEnabled(enabled); + m_ui.pgxpTextureCorrection->setEnabled(enabled); + m_ui.pgxpVertexCache->setEnabled(enabled); +} diff --git a/src/duckstation-qt/gpusettingswidget.h b/src/duckstation-qt/gpusettingswidget.h index 433c204e4..5206b77c9 100644 --- a/src/duckstation-qt/gpusettingswidget.h +++ b/src/duckstation-qt/gpusettingswidget.h @@ -19,6 +19,7 @@ private Q_SLOTS: void updateScaledDitheringEnabled(); void populateGPUAdapters(); void onGPUAdapterIndexChanged(); + void updatePGXPSettingsEnabled(); private: void setupAdditionalUi(); diff --git a/src/duckstation-qt/gpusettingswidget.ui b/src/duckstation-qt/gpusettingswidget.ui index 43a299d77..93aa03e49 100644 --- a/src/duckstation-qt/gpusettingswidget.ui +++ b/src/duckstation-qt/gpusettingswidget.ui @@ -7,13 +7,13 @@ 0 0 448 - 307 + 720 Form - + 0 @@ -27,165 +27,221 @@ 0 - - - Basic + + + true - - - - - Renderer: - - - - - - - - - - Adapter: - - - - - - - - - - Use Debug Device - - - - + + + + 0 + 0 + 423 + 762 + + + + + + + Basic + + + + + + Renderer: + + + + + + + + + + Adapter: + + + + + + + + + + Use Debug Device + + + + + + + + + + Screen Display + + + + + + Aspect Ratio: + + + + + + + + + + Crop: + + + + + + + + + + Linear Upscaling + + + + + + + Integer Upscaling + + + + + + + VSync + + + + + + + + + + Enhancements + + + + + + Resolution Scale: + + + + + + + + + + True Color Rendering (24-bit, disables dithering) + + + + + + + Scaled Dithering (scale dither pattern to resolution) + + + + + + + Disable Interlacing (force progressive render/scan) + + + + + + + Force NTSC Timings (60hz-on-PAL) + + + + + + + Bilinear Texture Filtering + + + + + + + Widescreen Hack + + + + + + + + + + PGXP + + + + + + Geometry Correction + + + + + + + Culling Correction + + + + + + + Texture Correction + + + + + + + Vertex Cache + + + + + + + + + + Qt::Vertical + + + + 20 + 40 + + + + + + - - - - Screen Display - - - - - - Aspect Ratio: - - - - - - - - - - Crop: - - - - - - - - - - Linear Upscaling - - - - - - - Integer Upscaling - - - - - - - VSync - - - - - - - - - - Enhancements - - - - - - Resolution Scale: - - - - - - - - - - True Color Rendering (24-bit, disables dithering) - - - - - - - Scaled Dithering (scale dither pattern to resolution) - - - - - - - Disable Interlacing (force progressive render/scan) - - - - - - - Force NTSC Timings (60hz-on-PAL) - - - - - - - Bilinear Texture Filtering - - - - - - - Widescreen Hack - - - - - - - - - - Qt::Vertical - - - - 20 - 40 - - - - diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index 3b2a89dd4..ec2b9cd56 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -858,6 +858,18 @@ void SDLHostInterface::DrawQuickSettingsMenu() ImGui::EndMenu(); } + if (ImGui::BeginMenu("PGXP")) + { + settings_changed |= ImGui::MenuItem("PGXP Enabled", nullptr, &m_settings_copy.gpu_pgxp_enable); + settings_changed |= + ImGui::MenuItem("PGXP Culling", nullptr, &m_settings_copy.gpu_pgxp_culling, m_settings_copy.gpu_pgxp_enable); + settings_changed |= ImGui::MenuItem("PGXP Texture Correction", nullptr, + &m_settings_copy.gpu_pgxp_texture_correction, m_settings_copy.gpu_pgxp_enable); + settings_changed |= ImGui::MenuItem("PGXP Vertex Cache", nullptr, &m_settings_copy.gpu_pgxp_vertex_cache, + m_settings_copy.gpu_pgxp_enable); + ImGui::EndMenu(); + } + settings_changed |= ImGui::MenuItem("True (24-Bit) Color", nullptr, &m_settings_copy.gpu_true_color); settings_changed |= ImGui::MenuItem("Scaled Dithering", nullptr, &m_settings_copy.gpu_scaled_dithering); settings_changed |= ImGui::MenuItem("Texture Filtering", nullptr, &m_settings_copy.gpu_texture_filtering); @@ -1316,6 +1328,11 @@ void SDLHostInterface::DrawSettingsWindow() settings_changed |= ImGui::Checkbox("Disable Interlacing", &m_settings_copy.gpu_disable_interlacing); settings_changed |= ImGui::Checkbox("Force NTSC Timings", &m_settings_copy.gpu_force_ntsc_timings); settings_changed |= ImGui::Checkbox("Widescreen Hack", &m_settings_copy.gpu_widescreen_hack); + + settings_changed |= ImGui::Checkbox("PGXP Enabled", &m_settings_copy.gpu_pgxp_enable); + settings_changed |= ImGui::Checkbox("PGXP Culling", &m_settings_copy.gpu_pgxp_culling); + settings_changed |= ImGui::Checkbox("PGXP Texture Correction", &m_settings_copy.gpu_pgxp_texture_correction); + settings_changed |= ImGui::Checkbox("PGXP Vertex Cache", &m_settings_copy.gpu_pgxp_vertex_cache); } ImGui::EndTabItem(); diff --git a/src/frontend-common/common_host_interface.cpp b/src/frontend-common/common_host_interface.cpp index fb2572904..56faf45ac 100644 --- a/src/frontend-common/common_host_interface.cpp +++ b/src/frontend-common/common_host_interface.cpp @@ -8,11 +8,13 @@ #include "controller_interface.h" #include "core/cdrom.h" #include "core/controller.h" +#include "core/cpu_code_cache.h" #include "core/dma.h" #include "core/game_list.h" #include "core/gpu.h" #include "core/host_display.h" #include "core/mdec.h" +#include "core/pgxp.h" #include "core/save_state_version.h" #include "core/spu.h" #include "core/system.h" @@ -1295,6 +1297,22 @@ void CommonHostInterface::RegisterGraphicsHotkeys() ToggleSoftwareRendering(); }); + RegisterHotkey(StaticString("Graphics"), StaticString("TogglePGXP"), StaticString("Toggle PGXP"), + [this](bool pressed) { + if (!pressed) + { + g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; + ReportFormattedMessage("PGXP is now %s.", g_settings.gpu_pgxp_enable ? "enabled" : "disabled"); + + if (g_settings.gpu_pgxp_enable) + PGXP::Initialize(); + + // we need to recompile all blocks if pgxp is toggled on/off + if (g_settings.IsUsingCodeCache()) + CPU::CodeCache::Flush(); + } + }); + RegisterHotkey(StaticString("Graphics"), StaticString("IncreaseResolutionScale"), StaticString("Increase Resolution Scale"), [this](bool pressed) { if (!pressed)