From 8f415a44e68d40a9544355cf8633f4da67a1c3a9 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 14 May 2024 13:57:35 +1000 Subject: [PATCH] CPU/NewRec: Fix register allocation crash on Linux --- src/core/cpu_code_cache.cpp | 37 ------------------------------- src/core/cpu_code_cache_private.h | 5 ----- src/core/cpu_newrec_compiler.cpp | 8 +++---- src/core/settings.h | 13 +++++------ 4 files changed, 9 insertions(+), 54 deletions(-) diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 6bdeb4d74..8eade1184 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -89,8 +89,6 @@ static std::vector s_blocks; // for compiling - reuse to avoid allocations static BlockInstructionList s_block_instructions; -#ifdef ENABLE_RECOMPILER_SUPPORT - static void BacklinkBlocks(u32 pc, const void* dst); static void UnlinkBlockExits(Block* block); @@ -143,18 +141,12 @@ static JitCodeBuffer s_code_buffer; static u32 s_total_instructions_compiled = 0; static u32 s_total_host_instructions_emitted = 0; #endif - -#endif // ENABLE_RECOMPILER_SUPPORT } // namespace CPU::CodeCache bool CPU::CodeCache::IsUsingAnyRecompiler() { -#ifdef ENABLE_RECOMPILER_SUPPORT return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler || g_settings.cpu_execution_mode == CPUExecutionMode::NewRec); -#else - return false; -#endif } bool CPU::CodeCache::IsUsingFastmem() @@ -166,7 +158,6 @@ bool CPU::CodeCache::ProcessStartup(Error* error) { AllocateLUTs(); -#ifdef ENABLE_RECOMPILER_SUPPORT #ifdef USE_STATIC_CODE_BUFFER const bool has_buffer = s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, HOST_PAGE_SIZE); @@ -178,7 +169,6 @@ bool CPU::CodeCache::ProcessStartup(Error* error) Error::SetStringView(error, "Failed to initialize code space"); return false; } -#endif if (!PageFaultHandler::Install(error)) return false; @@ -188,10 +178,7 @@ bool CPU::CodeCache::ProcessStartup(Error* error) void CPU::CodeCache::ProcessShutdown() { -#ifdef ENABLE_RECOMPILER_SUPPORT s_code_buffer.Destroy(); -#endif - DeallocateLUTs(); } @@ -199,14 +186,12 @@ void CPU::CodeCache::Initialize() { Assert(s_blocks.empty()); -#ifdef ENABLE_RECOMPILER_SUPPORT if (IsUsingAnyRecompiler()) { s_code_buffer.Reset(); CompileASMFunctions(); ResetCodeLUT(); } -#endif Bus::UpdateFastmemViews(IsUsingAnyRecompiler() ? g_settings.cpu_fastmem_mode : CPUFastmemMode::Disabled); CPU::UpdateMemoryPointers(); @@ -215,10 +200,7 @@ void CPU::CodeCache::Initialize() void CPU::CodeCache::Shutdown() { ClearBlocks(); - -#ifdef ENABLE_RECOMPILER_SUPPORT ClearASMFunctions(); -#endif Bus::UpdateFastmemViews(CPUFastmemMode::Disabled); CPU::UpdateMemoryPointers(); @@ -228,7 +210,6 @@ void CPU::CodeCache::Reset() { ClearBlocks(); -#ifdef ENABLE_RECOMPILER_SUPPORT if (IsUsingAnyRecompiler()) { ClearASMFunctions(); @@ -236,12 +217,10 @@ void CPU::CodeCache::Reset() CompileASMFunctions(); ResetCodeLUT(); } -#endif } void CPU::CodeCache::Execute() { -#ifdef ENABLE_RECOMPILER_SUPPORT if (IsUsingAnyRecompiler()) { g_enter_recompiler(); @@ -251,9 +230,6 @@ void CPU::CodeCache::Execute() { ExecuteCachedInterpreter(); } -#else - ExecuteCachedInterpreter(); -#endif } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -677,13 +653,11 @@ CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForBlock(con void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state) { -#ifdef ENABLE_RECOMPILER_SUPPORT if (block->state == BlockState::Valid) { SetCodeLUT(block->pc, g_compile_or_revalidate_block); BacklinkBlocks(block->pc, g_compile_or_revalidate_block); } -#endif block->state = new_state; } @@ -723,11 +697,9 @@ void CPU::CodeCache::ClearBlocks() ppi = {}; } -#ifdef ENABLE_RECOMPILER_SUPPORT s_fastmem_backpatch_info.clear(); s_fastmem_faulting_pcs.clear(); s_block_links.clear(); -#endif for (Block* block : s_blocks) { @@ -755,11 +727,7 @@ PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exceptio return PageFaultHandler::HandlerResult::ContinueExecution; } -#ifdef ENABLE_RECOMPILER_SUPPORT return CPU::CodeCache::HandleFastmemException(exception_pc, fault_address, is_write); -#else - return PageFaultHandler::HandlerResult::ExecuteNextHandler; -#endif } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1303,8 +1271,6 @@ void CPU::CodeCache::FillBlockRegInfo(Block* block) // MARK: - Recompiler Glue //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#ifdef ENABLE_RECOMPILER_SUPPORT - void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc) { // TODO: this doesn't currently handle when the cache overflows... @@ -1588,7 +1554,6 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address, bool is_write) { - // TODO: Catch general RAM writes, not just fastmem PhysicalMemoryAddress guest_address; #ifdef ENABLE_MMAP_FASTMEM @@ -1705,5 +1670,3 @@ void CPU::CodeCache::RemoveBackpatchInfoForRange(const void* host_code, u32 size // erase the whole range at once s_fastmem_backpatch_info.erase(start_iter, end_iter); } - -#endif // ENABLE_RECOMPILER_SUPPORT diff --git a/src/core/cpu_code_cache_private.h b/src/core/cpu_code_cache_private.h index cb4544bb0..2d10d5aa3 100644 --- a/src/core/cpu_code_cache_private.h +++ b/src/core/cpu_code_cache_private.h @@ -229,9 +229,6 @@ void InterpretUncachedBlock(); void LogCurrentState(); -#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC) -#define ENABLE_RECOMPILER_SUPPORT 1 - #if defined(_DEBUG) || false // Enable disassembly of host assembly code. #define ENABLE_HOST_DISASSEMBLY 1 @@ -278,6 +275,4 @@ extern PerfScope MIPSPerfScope; #endif // ENABLE_RECOMPILER_PROFILING -#endif // ENABLE_RECOMPILER - } // namespace CPU::CodeCache diff --git a/src/core/cpu_newrec_compiler.cpp b/src/core/cpu_newrec_compiler.cpp index eb21f885d..84dd0e4ae 100644 --- a/src/core/cpu_newrec_compiler.cpp +++ b/src/core/cpu_newrec_compiler.cpp @@ -542,7 +542,7 @@ u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags) // find register with lowest counter u32 lowest = NUM_HOST_REGS; - u16 lowest_count = std::numeric_limits::max(); + u32 lowest_count = std::numeric_limits::max(); for (u32 i = 0; i < NUM_HOST_REGS; i++) { const HostRegAlloc& ra = m_host_regs[i]; @@ -577,7 +577,7 @@ u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags) if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED) { u32 caller_saved_lowest = NUM_HOST_REGS; - u16 caller_saved_lowest_count = std::numeric_limits::max(); + u32 caller_saved_lowest_count = std::numeric_limits::max(); for (u32 i = 0; i < NUM_HOST_REGS; i++) { constexpr u32 caller_req_flags = HR_USABLE; @@ -1274,8 +1274,8 @@ void CPU::NewRec::Compiler::CompileInstruction() case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; - case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(false); break; - case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(true); break; + case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break; + case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break; case InstructionOp::cop0: { diff --git a/src/core/settings.h b/src/core/settings.h index 336fc5035..744e9f6e9 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -451,8 +451,12 @@ struct Settings static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f; -#if defined(ENABLE_RECOMPILER) + // Prefer oldrec over newrec for now. Except on RISC-V, where there is no oldrec. +#if defined(CPU_ARCH_RISCV64) + static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec; +#else static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler; +#endif // LUT still ends up faster on Apple Silicon for now, because of 16K pages. #if defined(ENABLE_MMAP_FASTMEM) && (!defined(__APPLE__) || !defined(__aarch64__)) @@ -460,13 +464,6 @@ struct Settings #else static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT; #endif -#elif defined(ENABLE_NEWREC) - static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec; - static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap; -#else - static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter; - static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled; -#endif static constexpr DisplayDeinterlacingMode DEFAULT_DISPLAY_DEINTERLACING_MODE = DisplayDeinterlacingMode::Adaptive; static constexpr DisplayCropMode DEFAULT_DISPLAY_CROP_MODE = DisplayCropMode::Overscan;