diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 5d7e58b68..7a92ccaec 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -54,6 +54,8 @@ void CodeCache::Execute() else InterpretCachedBlock(*m_current_block); + //LogCurrentState(); + next_block_key = GetNextBlockKey(); if (m_current_block_flushed) { @@ -84,6 +86,19 @@ void CodeCache::Reset() m_code_buffer->Reset(); } +void CodeCache::LogCurrentState() +{ + const auto& regs = m_core->m_regs; + WriteToExecutionLog( + "tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X " + "t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X " + "s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X\n", + m_system->GetGlobalTickCounter(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0, regs.v1, regs.a0, regs.a1, + regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, + regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, + regs.ra); +} + CodeBlockKey CodeCache::GetNextBlockKey() const { const u32 address = m_bus->UnmirrorAddress(m_core->m_regs.pc & UINT32_C(0x1FFFFFFF)); diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 7855b412d..45f4022e8 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -34,6 +34,8 @@ public: private: using BlockMap = std::unordered_map; + void LogCurrentState(); + CodeBlockKey GetNextBlockKey() const; const CodeBlock* LookupBlock(CodeBlockKey key); bool CompileBlock(CodeBlock* block); diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 5416edd48..c2d8d5941 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -365,7 +365,7 @@ void Core::FlushPipeline() m_regs.r[static_cast(m_load_delay_reg)] = m_load_delay_value; m_load_delay_reg = Reg::count; } - + // not in a branch delay slot m_branch_was_taken = false; m_next_instruction_is_branch_delay_slot = false; diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index cd84b7ce6..8641220f2 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -92,7 +92,7 @@ private: bool InKernelMode() const { return !m_cop0_regs.sr.KUc; } // timing - void AddTicks(TickCount ticks) + ALWAYS_INLINE void AddTicks(TickCount ticks) { m_pending_ticks += ticks; m_downcount -= ticks; diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 8a23cce81..fc84d8e34 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -76,6 +76,20 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_BitwiseImmediate(cbi); break; + case InstructionOp::lb: + case InstructionOp::lbu: + case InstructionOp::lh: + case InstructionOp::lhu: + case InstructionOp::lw: + result = Compile_Load(cbi); + break; + + case InstructionOp::sb: + case InstructionOp::sh: + case InstructionOp::sw: + result = Compile_Store(cbi); + break; + case InstructionOp::lui: result = Compile_lui(cbi); break; @@ -497,6 +511,7 @@ void CodeGenerator::BlockPrologue() EmitStoreCPUStructField(offsetof(Core, m_exception_raised), Value::FromConstantU8(0)); // we don't know the state of the last block, so assume load delays might be in progress + // TODO: Pull load delay into register cache m_current_instruction_in_branch_delay_slot_dirty = true; m_branch_was_taken_dirty = true; m_current_instruction_was_branch_taken_dirty = false; @@ -517,7 +532,9 @@ void CodeGenerator::BlockEpilogue() m_emit.nop(); #endif - m_register_cache.FlushAllGuestRegisters(true, false); + m_register_cache.FlushAllGuestRegisters(true, true); + if (m_register_cache.HasLoadDelay()) + m_register_cache.WriteLoadDelayToCPU(true); // if the last instruction wasn't a fallback, we need to add its fetch if (m_delayed_pc_add > 0) @@ -604,6 +621,8 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi) { + m_register_cache.UpdateLoadDelay(); + // copy if the previous instruction was a load, reset the current value on the next instruction if (m_next_load_delay_dirty) { @@ -652,6 +671,11 @@ bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) // flush and invalidate all guest registers, since the fallback could change any of them m_register_cache.FlushAllGuestRegisters(true, true); + if (m_register_cache.HasLoadDelay()) + { + m_load_delay_dirty = true; + m_register_cache.WriteLoadDelayToCPU(true); + } EmitStoreCPUStructField(offsetof(Core, m_current_instruction.bits), Value::FromConstantU32(cbi.instruction.bits)); @@ -661,7 +685,7 @@ bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) // TODO: Use carry flag or something here too Value return_value = m_register_cache.AllocateScratch(RegSize_8); EmitFunctionCall(&return_value, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr()); - EmitBlockExitOnBool(return_value); + EmitExceptionExitOnBool(return_value); } else { @@ -777,6 +801,78 @@ bool CodeGenerator::Compile_ShiftVariable(const CodeBlockInstruction& cbi) return true; } +bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + // rt <- mem[rs + sext(imm)] + Value base = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs); + Value offset = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + Value address = AddValues(base, offset); + + Value result; + switch (cbi.instruction.op) + { + case InstructionOp::lb: + case InstructionOp::lbu: + result = EmitLoadGuestMemory(address, RegSize_8); + ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb)); + break; + + case InstructionOp::lh: + case InstructionOp::lhu: + result = EmitLoadGuestMemory(address, RegSize_16); + ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh)); + break; + + case InstructionOp::lw: + result = EmitLoadGuestMemory(address, RegSize_32); + break; + + default: + UnreachableCode(); + break; + } + + m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(result)); + + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + // mem[rs + sext(imm)] <- rt + Value base = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs); + Value offset = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + Value address = AddValues(base, offset); + Value value = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt); + + switch (cbi.instruction.op) + { + case InstructionOp::sb: + EmitStoreGuestMemory(address, value.ViewAsSize(RegSize_8)); + break; + + case InstructionOp::sh: + EmitStoreGuestMemory(address, value.ViewAsSize(RegSize_16)); + break; + + case InstructionOp::sw: + EmitStoreGuestMemory(address, value); + break; + + default: + UnreachableCode(); + break; + } + + InstructionEpilogue(cbi); + return true; +} + bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi) { InstructionPrologue(cbi, 1); diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index d66a18d89..3363be1ab 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -43,7 +43,8 @@ public: ////////////////////////////////////////////////////////////////////////// void EmitBeginBlock(); void EmitEndBlock(); - void EmitBlockExitOnBool(const Value& value); + void EmitExceptionExit(); + void EmitExceptionExitOnBool(const Value& value); void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size); void EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size); @@ -65,10 +66,15 @@ public: void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg); void EmitStoreGuestRegister(Reg guest_reg, const Value& value); + void EmitStoreLoadDelay(Reg reg, const Value& value); void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset); void EmitStoreCPUStructField(u32 offset, const Value& value); void EmitAddCPUStructField(u32 offset, const Value& value); + // Automatically generates an exception handler. + Value EmitLoadGuestMemory(const Value& address, RegSize size); + void EmitStoreGuestMemory(const Value& address, const Value& value); + u32 PrepareStackForCall(); void RestoreStackAfterCall(u32 adjust_size); @@ -161,6 +167,8 @@ private: bool Compile_BitwiseImmediate(const CodeBlockInstruction& cbi); bool Compile_ShiftImmediate(const CodeBlockInstruction& cbi); bool Compile_ShiftVariable(const CodeBlockInstruction& cbi); + bool Compile_Load(const CodeBlockInstruction& cbi); + bool Compile_Store(const CodeBlockInstruction& cbi); bool Compile_lui(const CodeBlockInstruction& cbi); bool Compile_addiu(const CodeBlockInstruction& cbi); diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index a7628b12f..d9db3007f 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -17,5 +17,26 @@ void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value) EmitStoreCPUStructField(CalculateRegisterOffset(guest_reg), value); } +void CodeGenerator::EmitStoreLoadDelay(Reg reg, const Value& value) +{ + DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); + EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast(reg))); + EmitStoreCPUStructField(offsetof(Core, m_load_delay_value), value); + + // We don't want to allocate a register since this could be in a block exit, so re-use the value. + if (m_register_cache.IsGuestRegisterCached(reg)) + { + EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), m_register_cache.ReadGuestRegister(reg)); + } + else + { + EmitPushHostReg(value.host_reg); + EmitLoadCPUStructField(value.host_reg, RegSize_32, CalculateRegisterOffset(reg)); + EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), value); + EmitPopHostReg(value.host_reg); + } + + m_load_delay_dirty = true; +} } // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index 749c86c9d..5f5b6310b 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -157,7 +157,20 @@ void CodeGenerator::EmitEndBlock() m_emit.ret(); } -void CodeGenerator::EmitBlockExitOnBool(const Value& value) +void CodeGenerator::EmitExceptionExit() +{ + // ensure all unflushed registers are written back + m_register_cache.FlushAllGuestRegisters(false, false); + + // the interpreter load delay might have its own value, but we'll overwrite it here anyway + // technically RaiseException() and FlushPipeline() have already been called, but that should be okay + m_register_cache.FlushLoadDelayForException(); + + m_register_cache.PopCalleeSavedRegisters(false); + m_emit.ret(); +} + +void CodeGenerator::EmitExceptionExitOnBool(const Value& value) { Assert(!value.IsConstant() && value.IsInHostRegister()); @@ -165,10 +178,7 @@ void CodeGenerator::EmitBlockExitOnBool(const Value& value) m_emit.test(GetHostReg8(value), GetHostReg8(value)); m_emit.jz(continue_label); - // flush current state and return - m_register_cache.FlushAllGuestRegisters(false, false); - m_register_cache.PopCalleeSavedRegisters(false); - m_emit.ret(); + EmitExceptionExit(); m_emit.L(continue_label); } @@ -1303,6 +1313,100 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) } } +Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size) +{ + // We need to use the full 64 bits here since we test the sign bit result. + Value result = m_register_cache.AllocateScratch(RegSize_64); + + // NOTE: This can leave junk in the upper bits + switch (size) + { + case RegSize_8: + EmitFunctionCall(&result, &Thunks::ReadMemoryByte, m_register_cache.GetCPUPtr(), address); + break; + + case RegSize_16: + EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, m_register_cache.GetCPUPtr(), address); + break; + + case RegSize_32: + EmitFunctionCall(&result, &Thunks::ReadMemoryWord, m_register_cache.GetCPUPtr(), address); + break; + + default: + UnreachableCode(); + break; + } + + Xbyak::Label load_okay; + + m_emit.test(GetHostReg64(result.host_reg), GetHostReg64(result.host_reg)); + + // force a 32-bit offset, this will be far code eventually... + m_emit.jns(load_okay, Xbyak::CodeGenerator::T_NEAR); + + // load exception path + EmitExceptionExit(); + + m_emit.L(load_okay); + + // Downcast to ignore upper 56/48/32 bits. This should be a noop. + switch (size) + { + case RegSize_8: + ConvertValueSizeInPlace(&result, RegSize_8, false); + break; + + case RegSize_16: + ConvertValueSizeInPlace(&result, RegSize_16, false); + break; + + case RegSize_32: + ConvertValueSizeInPlace(&result, RegSize_32, false); + break; + + default: + UnreachableCode(); + break; + } + + return result; +} + +void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& value) +{ + Value result = m_register_cache.AllocateScratch(RegSize_8); + + switch (value.size) + { + case RegSize_8: + EmitFunctionCall(&result, &Thunks::WriteMemoryByte, m_register_cache.GetCPUPtr(), address, value); + break; + + case RegSize_16: + EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, m_register_cache.GetCPUPtr(), address, value); + break; + + case RegSize_32: + EmitFunctionCall(&result, &Thunks::WriteMemoryWord, m_register_cache.GetCPUPtr(), address, value); + break; + + default: + UnreachableCode(); + break; + } + + Xbyak::Label store_okay; + + m_emit.test(GetHostReg8(result), GetHostReg8(result)); + m_emit.jnz(store_okay); + + // load exception path + EmitExceptionExit(); + + m_emit.L(store_okay); +} + void CodeGenerator::EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next) { Value reg = m_register_cache.AllocateScratch(RegSize_8); diff --git a/src/core/cpu_recompiler_register_cache.cpp b/src/core/cpu_recompiler_register_cache.cpp index e540c4b77..a3ae22481 100644 --- a/src/core/cpu_recompiler_register_cache.cpp +++ b/src/core/cpu_recompiler_register_cache.cpp @@ -248,7 +248,7 @@ void RegisterCache::EnsureHostRegFree(HostReg reg) for (u8 i = 0; i < static_cast(Reg::count); i++) { if (m_guest_reg_cache[i].IsInHostRegister() && m_guest_reg_cache[i].GetHostRegister() == reg) - FlushGuestRegister(m_guest_reg_cache[i], static_cast(i), true, true); + FlushGuestRegister(static_cast(i), true, true); } } @@ -331,18 +331,12 @@ u32 RegisterCache::PopCalleeSavedRegisters(bool commit) Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */, HostReg forced_host_reg /* = HostReg_Invalid */) -{ - return ReadGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg, cache, force_host_register, - forced_host_reg); -} - -Value RegisterCache::ReadGuestRegister(Value& cache_value, Reg guest_reg, bool cache, bool force_host_register, - HostReg forced_host_reg) { // register zero is always zero if (guest_reg == Reg::zero) return Value::FromConstantU32(0); + Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; if (cache_value.IsValid()) { if (cache_value.IsInHostRegister()) @@ -427,17 +421,21 @@ Value RegisterCache::ReadGuestRegister(Value& cache_value, Reg guest_reg, bool c } Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value) -{ - return WriteGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg, std::move(value)); -} - -Value RegisterCache::WriteGuestRegister(Value& cache_value, Reg guest_reg, Value&& value) { // ignore writes to register zero + DebugAssert(value.size == RegSize_32); if (guest_reg == Reg::zero) return std::move(value); - DebugAssert(value.size == RegSize_32); + // cancel any load delay delay + if (m_load_delay_register == guest_reg) + { + Log_DebugPrintf("Cancelling load delay of register %s because of non-delayed write", GetRegName(guest_reg)); + m_load_delay_register = Reg::count; + m_load_delay_value.ReleaseAndClear(); + } + + Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg) { // updating the register value. @@ -448,7 +446,7 @@ Value RegisterCache::WriteGuestRegister(Value& cache_value, Reg guest_reg, Value return cache_value; } - InvalidateGuestRegister(cache_value, guest_reg); + InvalidateGuestRegister(guest_reg); DebugAssert(!cache_value.IsValid()); if (value.IsConstant()) @@ -486,13 +484,96 @@ Value RegisterCache::WriteGuestRegister(Value& cache_value, Reg guest_reg, Value return Value::FromHostReg(this, cache_value.host_reg, RegSize_32); } -void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty) +void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value) { - FlushGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg, invalidate, clear_dirty); + // ignore writes to register zero + DebugAssert(value.size == RegSize_32); + if (guest_reg == Reg::zero) + return; + + // two load delays in a row? cancel the first one. + if (guest_reg == m_load_delay_register) + { + Log_DebugPrintf("Cancelling load delay of register %s due to new load delay", GetRegName(guest_reg)); + m_load_delay_register = Reg::count; + m_load_delay_value.ReleaseAndClear(); + } + + // set up the load delay at the end of this instruction + Value& cache_value = m_next_load_delay_value; + Assert(m_next_load_delay_register == Reg::count); + m_next_load_delay_register = guest_reg; + + // If it's a temporary, we can bind that to the guest register. + if (value.IsScratch()) + { + Log_DebugPrintf("Binding scratch register %s to load-delayed guest register %s", + m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg)); + + cache_value = std::move(value); + return; + } + + // Allocate host register, and copy value to it. + cache_value = AllocateScratch(RegSize_32); + m_code_generator.EmitCopyValue(cache_value.host_reg, value); + + Log_DebugPrintf("Copying non-scratch register %s to %s to load-delayed guest register %s", + m_code_generator.GetHostRegName(value.host_reg, RegSize_32), + m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32), GetRegName(guest_reg)); } -void RegisterCache::FlushGuestRegister(Value& cache_value, Reg guest_reg, bool invalidate, bool clear_dirty) +void RegisterCache::UpdateLoadDelay() { + // flush current load delay + if (m_load_delay_register != Reg::count) + { + // have to clear first because otherwise it'll release the value + Reg reg = m_load_delay_register; + Value value = std::move(m_load_delay_value); + m_load_delay_register = Reg::count; + WriteGuestRegister(reg, std::move(value)); + } + + // next load delay -> load delay + if (m_next_load_delay_register != Reg::count) + { + m_load_delay_register = m_next_load_delay_register; + m_load_delay_value = std::move(m_next_load_delay_value); + m_next_load_delay_register = Reg::count; + } +} + +void RegisterCache::WriteLoadDelayToCPU(bool clear) +{ + // There shouldn't be a flush at the same time as there's a new load delay. + Assert(m_next_load_delay_register == Reg::count); + if (m_load_delay_register != Reg::count) + { + Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_load_delay_register)); + m_code_generator.EmitStoreLoadDelay(m_load_delay_register, m_load_delay_value); + if (clear) + { + m_load_delay_register = Reg::count; + m_load_delay_value.ReleaseAndClear(); + } + } +} + +void RegisterCache::FlushLoadDelayForException() +{ + Assert(m_next_load_delay_register == Reg::count); + if (m_load_delay_register == Reg::count) + return; + + // if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next + // non-exception-raised path. TODO: push/pop whole state would avoid this issue + m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value); +} + +void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty) +{ + Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; if (cache_value.IsDirty()) { if (cache_value.IsInHostRegister()) @@ -511,16 +592,12 @@ void RegisterCache::FlushGuestRegister(Value& cache_value, Reg guest_reg, bool i } if (invalidate) - InvalidateGuestRegister(cache_value, guest_reg); + InvalidateGuestRegister(guest_reg); } void RegisterCache::InvalidateGuestRegister(Reg guest_reg) { - InvalidateGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg); -} - -void RegisterCache::InvalidateGuestRegister(Value& cache_value, Reg guest_reg) -{ + Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; if (!cache_value.IsValid()) return; @@ -601,5 +678,4 @@ void RegisterCache::AppendRegisterToOrder(Reg reg) m_guest_register_order[0] = reg; m_guest_register_order_count++; } - } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_register_cache.h b/src/core/cpu_recompiler_register_cache.h index a090c733f..b30bf7082 100644 --- a/src/core/cpu_recompiler_register_cache.h +++ b/src/core/cpu_recompiler_register_cache.h @@ -107,6 +107,36 @@ struct Value void SetDirty() { flags |= ValueFlags::Dirty; } void ClearDirty() { flags &= ~ValueFlags::Dirty; } + /// Returns the same register viewed as a different size. + Value ViewAsSize(RegSize view_size) const + { + if (view_size == size) + return *this; + + if (IsConstant()) + { + // truncate to size + switch (view_size) + { + case RegSize_8: + return Value::FromConstant(constant_value & UINT64_C(0xFF), RegSize_8); + case RegSize_16: + return Value::FromConstant(constant_value & UINT64_C(0xFFFF), RegSize_16); + case RegSize_32: + return Value::FromConstant(constant_value & UINT64_C(0xFFFFFFFF), RegSize_32); + case RegSize_64: + default: + return Value::FromConstant(constant_value, view_size); + } + } + + if (IsInHostRegister()) + return Value::FromHostReg(regcache, host_reg, view_size); + + // invalid? + return Value(); + } + static Value FromHostReg(RegisterCache* regcache, HostReg reg, RegSize size) { return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister); @@ -189,9 +219,10 @@ public: ////////////////////////////////////////////////////////////////////////// /// Returns true if the specified guest register is cached. - bool IsGuestRegisterInHostReg(Reg guest_reg) const + bool IsGuestRegisterCached(Reg guest_reg) const { - return m_guest_reg_cache[static_cast(guest_reg)].IsInHostRegister(); + const Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; + return cache_value.IsConstant() || cache_value.IsInHostRegister(); } /// Returns the host register if the guest register is cached. @@ -202,12 +233,27 @@ public: return m_guest_reg_cache[static_cast(guest_reg)].GetHostRegister(); } + /// Returns true if there is a load delay which will be stored at the end of the instruction. + bool HasLoadDelay() const { return m_load_delay_register != Reg::count; } + Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false, HostReg forced_host_reg = HostReg_Invalid); /// Creates a copy of value, and stores it to guest_reg. Value WriteGuestRegister(Reg guest_reg, Value&& value); + /// Stores the specified value to the guest register after the next instruction (load delay). + void WriteGuestRegisterDelayed(Reg guest_reg, Value&& value); + + /// Moves load delay to the next load delay, and writes any previous load delay to the destination register. + void UpdateLoadDelay(); + + /// Writes the load delay to the CPU structure, so it is synced up with the interpreter. + void WriteLoadDelayToCPU(bool clear); + + /// Flushes the load delay, i.e. writes it to the destination register. + void FlushLoadDelayForException(); + void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty); void InvalidateGuestRegister(Reg guest_reg); @@ -215,11 +261,6 @@ public: bool EvictOneGuestRegister(); private: - Value ReadGuestRegister(Value& cache_value, Reg guest_reg, bool cache, bool force_host_register, - HostReg forced_host_reg); - Value WriteGuestRegister(Value& cache_value, Reg guest_reg, Value&& value); - void FlushGuestRegister(Value& cache_value, Reg guest_reg, bool invalidate, bool clear_dirty); - void InvalidateGuestRegister(Value& cache_value, Reg guest_reg); void ClearRegisterFromOrder(Reg reg); void PushRegisterToOrder(Reg reg); void AppendRegisterToOrder(Reg reg); @@ -238,6 +279,12 @@ private: std::array m_host_register_callee_saved_order{}; u32 m_host_register_callee_saved_order_count = 0; + + Reg m_load_delay_register = Reg::count; + Value m_load_delay_value{}; + + Reg m_next_load_delay_register = Reg::count; + Value m_next_load_delay_value{}; }; } // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_thunks.cpp b/src/core/cpu_recompiler_thunks.cpp index 9d968da97..c2eec38a2 100644 --- a/src/core/cpu_recompiler_thunks.cpp +++ b/src/core/cpu_recompiler_thunks.cpp @@ -4,19 +4,52 @@ namespace CPU::Recompiler { // TODO: Port thunks to "ASM routines", i.e. code in the jit buffer. -bool Thunks::ReadMemoryByte(Core* cpu, u32 address, u8* value) +u64 Thunks::ReadMemoryByte(Core* cpu, u32 address) { - return cpu->ReadMemoryByte(address, value); + u32 temp = 0; + const TickCount cycles = cpu->DoMemoryAccess(address, temp); + if (cycles < 0) + { + cpu->RaiseException(Exception::DBE); + return UINT64_C(0xFFFFFFFFFFFFFFFF); + } + + cpu->AddTicks(cycles - 1); + return ZeroExtend64(temp); } -bool Thunks::ReadMemoryHalfWord(Core* cpu, u32 address, u16* value) +u64 Thunks::ReadMemoryHalfWord(Core* cpu, u32 address) { - return cpu->ReadMemoryHalfWord(address, value); + if (!cpu->DoAlignmentCheck(address)) + return UINT64_C(0xFFFFFFFFFFFFFFFF); + + u32 temp = 0; + const TickCount cycles = cpu->DoMemoryAccess(address, temp); + if (cycles < 0) + { + cpu->RaiseException(Exception::DBE); + return UINT64_C(0xFFFFFFFFFFFFFFFF); + } + + cpu->AddTicks(cycles - 1); + return ZeroExtend64(temp); } -bool Thunks::ReadMemoryWord(Core* cpu, u32 address, u32* value) +u64 Thunks::ReadMemoryWord(Core* cpu, u32 address) { - return cpu->ReadMemoryWord(address, value); + if (!cpu->DoAlignmentCheck(address)) + return UINT64_C(0xFFFFFFFFFFFFFFFF); + + u32 temp = 0; + const TickCount cycles = cpu->DoMemoryAccess(address, temp); + if (cycles < 0) + { + cpu->RaiseException(Exception::DBE); + return UINT64_C(0xFFFFFFFFFFFFFFFF); + } + + cpu->AddTicks(cycles - 1); + return ZeroExtend64(temp); } bool Thunks::WriteMemoryByte(Core* cpu, u32 address, u8 value) diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h index 3cfbfdb14..0523d050f 100644 --- a/src/core/cpu_recompiler_thunks.h +++ b/src/core/cpu_recompiler_thunks.h @@ -13,9 +13,9 @@ public: // Needed because we can't cast member functions to void*... // TODO: Abuse carry flag or something else for exception ////////////////////////////////////////////////////////////////////////// - static bool ReadMemoryByte(Core* cpu, u32 address, u8* value); - static bool ReadMemoryHalfWord(Core* cpu, u32 address, u16* value); - static bool ReadMemoryWord(Core* cpu, u32 address, u32* value); + static u64 ReadMemoryByte(Core* cpu, u32 address); + static u64 ReadMemoryHalfWord(Core* cpu, u32 address); + static u64 ReadMemoryWord(Core* cpu, u32 address); static bool WriteMemoryByte(Core* cpu, u32 address, u8 value); static bool WriteMemoryHalfWord(Core* cpu, u32 address, u16 value); static bool WriteMemoryWord(Core* cpu, u32 address, u32 value);