From 4d1837acb1e01d65fb7b4fcb0c4a6979378e85bd Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 26 Sep 2019 02:43:28 +1000 Subject: [PATCH] GTE: Special case for RTPS --- src/pse/gte.cpp | 52 ++++++++++++++++++++++++++++++++++++------------- src/pse/gte.h | 2 +- src/pse/gte.inl | 2 +- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/pse/gte.cpp b/src/pse/gte.cpp index 813abb454..216804185 100644 --- a/src/pse/gte.cpp +++ b/src/pse/gte.cpp @@ -388,15 +388,37 @@ void Core::PushRGB(u8 r, u8 g, u8 b, u8 c) m_regs.RGB2 = ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(c) << 24); } -void Core::RTPS(const s16 V[3], bool sf, bool lm) +void Core::RTPS(const s16 V[3], bool sf, bool lm, bool last) { + const u8 shift = sf ? 12 : 0; +#define dot3(i) \ + CheckMACResult( \ + (s64(m_regs.TR[i]) << 12) + \ + CheckMACResult(CheckMACResult(CheckMACResult(s64(s32(m_regs.RT[i][0]) * s32(V[0]))) + \ + s64(s32(m_regs.RT[i][1]) * s32(V[1]))) + \ + s64(s32(m_regs.RT[i][2]) * s32(V[2])))) + // IR1 = MAC1 = (TRX*1000h + RT11*VX0 + RT12*VY0 + RT13*VZ0) SAR (sf*12) // IR2 = MAC2 = (TRY*1000h + RT21*VX0 + RT22*VY0 + RT23*VZ0) SAR (sf*12) // IR3 = MAC3 = (TRZ*1000h + RT31*VX0 + RT32*VY0 + RT33*VZ0) SAR (sf*12) - MulMatVec(m_regs.RT, m_regs.TR, V[0], V[1], V[2], sf ? 12 : 0, lm); + const s64 x = dot3(0); + const s64 y = dot3(1); + const s64 z = dot3(2); + TruncateAndSetMAC<1>(x, shift); + TruncateAndSetMAC<2>(y, shift); + TruncateAndSetMAC<3>(z, shift); + TruncateAndSetIR<1>(m_regs.MAC1, lm); + TruncateAndSetIR<2>(m_regs.MAC2, lm); + + // The command does saturate IR1,IR2,IR3 to -8000h..+7FFFh (regardless of lm bit). When using RTP with sf=0, then the + // IR3 saturation flag (FLAG.22) gets set if "MAC3 SAR 12" exceeds -8000h..+7FFFh (although IR3 is saturated + // when "MAC3" exceeds -8000h..+7FFFh). + TruncateAndSetIR<3>(m_regs.MAC3, false); + m_regs.dr32[11] = std::clamp(m_regs.MAC3, lm ? 0 : IR123_MIN_VALUE, IR123_MAX_VALUE); +#undef dot3 // SZ3 = MAC3 SAR ((1-sf)*12) ;ScreenZ FIFO 0..+FFFFh - PushSZ(sf ? m_regs.MAC3 : (m_regs.MAC3 >> 12)); + PushSZ(s32(z >> 12)); s32 result; if (m_regs.SZ3 == 0) @@ -416,18 +438,22 @@ void Core::RTPS(const s16 V[3], bool sf, bool lm) // MAC0=(((H*20000h/SZ3)+1)/2)*IR1+OFX, SX2=MAC0/10000h ;ScrX FIFO -400h..+3FFh // MAC0=(((H*20000h/SZ3)+1)/2)*IR2+OFY, SY2=MAC0/10000h ;ScrY FIFO -400h..+3FFh - // MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h - const s32 Sx = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 16)); - const s32 Sy = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 16)); - const s32 Sz = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 12)); - PushSXY(Sx, Sy); - TruncateAndSetIR<0>(Sz, true); + const s64 Sx = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 0); + const s64 Sy = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 0); + PushSXY(s32(Sx >> 16), s32(Sy >> 16)); + + if (last) + { + // MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h + const s64 Sz = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 0); + TruncateAndSetIR<0>(s32(Sz >> 12), true); + } } void Core::Execute_RTPS(Instruction inst) { m_regs.FLAG.Clear(); - RTPS(m_regs.V0, inst.sf, inst.lm); + RTPS(m_regs.V0, inst.sf, inst.lm, true); m_regs.FLAG.UpdateError(); } @@ -436,9 +462,9 @@ void Core::Execute_RTPT(Instruction inst) m_regs.FLAG.Clear(); const bool sf = inst.sf; - RTPS(m_regs.V0, sf, inst.lm); - RTPS(m_regs.V1, sf, inst.lm); - RTPS(m_regs.V2, sf, inst.lm); + RTPS(m_regs.V0, sf, inst.lm, false); + RTPS(m_regs.V1, sf, inst.lm, false); + RTPS(m_regs.V2, sf, inst.lm, true); m_regs.FLAG.UpdateError(); } diff --git a/src/pse/gte.h b/src/pse/gte.h index ef4af00a7..9c9ac9657 100644 --- a/src/pse/gte.h +++ b/src/pse/gte.h @@ -59,7 +59,7 @@ private: // 3x3 matrix * 3x1 vector with translation, updates MAC[1-3] and IR[1-3] void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm); - void RTPS(const s16 V[3], bool sf, bool lm); + void RTPS(const s16 V[3], bool sf, bool lm, bool last); void NCCS(const s16 V[3], bool sf, bool lm); void NCDS(const s16 V[3], bool sf, bool lm); diff --git a/src/pse/gte.inl b/src/pse/gte.inl index 9c7e201d7..260ecd7c3 100644 --- a/src/pse/gte.inl +++ b/src/pse/gte.inl @@ -48,7 +48,7 @@ s16 GTE::Core::TruncateAndSetIR(s32 value, bool lm) { constexpr s32 MIN_VALUE = (index == 0) ? IR0_MIN_VALUE : IR123_MIN_VALUE; constexpr s32 MAX_VALUE = (index == 0) ? IR0_MAX_VALUE : IR123_MAX_VALUE; - const s32 actual_min_value = lm ? 0 : -0x8000; + const s32 actual_min_value = lm ? 0 : MIN_VALUE; if (value < actual_min_value) { value = actual_min_value;