diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 840798ce3..1fb188b85 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -38,6 +38,8 @@ add_library(common
minizip_helpers.cpp
minizip_helpers.h
path.h
+ perf_scope.cpp
+ perf_scope.h
progress_callback.cpp
progress_callback.h
rectangle.h
diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj
index df252a05f..e8a58bd0c 100644
--- a/src/common/common.vcxproj
+++ b/src/common/common.vcxproj
@@ -28,6 +28,7 @@
+
@@ -59,6 +60,7 @@
+
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters
index 4a147b3a9..1fb457f4c 100644
--- a/src/common/common.vcxproj.filters
+++ b/src/common/common.vcxproj.filters
@@ -43,6 +43,7 @@
+
@@ -69,6 +70,7 @@
+
diff --git a/src/common/intrin.h b/src/common/intrin.h
index b75ea45ba..7d5f18968 100644
--- a/src/common/intrin.h
+++ b/src/common/intrin.h
@@ -28,7 +28,7 @@
#endif
template
-static inline void MemsetPtrs(T* ptr, T value, u32 count)
+ALWAYS_INLINE_RELEASE static void MemsetPtrs(T* ptr, T value, u32 count)
{
static_assert(std::is_pointer_v, "T is pointer type");
static_assert(sizeof(T) == sizeof(void*), "T isn't a fat pointer");
diff --git a/src/common/perf_scope.cpp b/src/common/perf_scope.cpp
new file mode 100644
index 000000000..ad679dabd
--- /dev/null
+++ b/src/common/perf_scope.cpp
@@ -0,0 +1,198 @@
+
+// SPDX-FileCopyrightText: 2023 Connor McLaughlin , PCSX2 Team
+// SPDX-License-Identifier: GPL-3.0
+
+#include "perf_scope.h"
+#include "assert.h"
+#include "string_util.h"
+
+#include
+#include
+
+#ifdef __linux__
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#endif
+
+// #define ProfileWithPerf
+// #define ProfileWithPerfJitDump
+
+// Perf is only supported on linux
+#if defined(__linux__) && defined(ProfileWithPerf)
+
+static std::FILE* s_map_file = nullptr;
+static bool s_map_file_opened = false;
+static std::mutex s_mutex;
+static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
+{
+ std::unique_lock lock(s_mutex);
+
+ if (!s_map_file)
+ {
+ if (s_map_file_opened)
+ return;
+
+ char file[256];
+ snprintf(file, std::size(file), "/tmp/perf-%d.map", getpid());
+ s_map_file = std::fopen(file, "wb");
+ s_map_file_opened = true;
+ if (!s_map_file)
+ return;
+ }
+
+ std::fprintf(s_map_file, "%" PRIx64 " %zx %s\n", static_cast(reinterpret_cast(ptr)), size, symbol);
+ std::fflush(s_map_file);
+}
+
+#elif defined(__linux__) && defined(ProfileWithPerfJitDump)
+enum : u32
+{
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1,
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3,
+ JIT_CODE_UNWINDING_INFO = 4
+};
+
+#pragma pack(push, 1)
+struct JITDUMP_HEADER
+{
+ u32 magic = 0x4A695444; // JiTD
+ u32 version = 1;
+ u32 header_size = sizeof(JITDUMP_HEADER);
+ u32 elf_mach;
+ u32 pad1 = 0;
+ u32 pid;
+ u64 timestamp;
+ u64 flags = 0;
+};
+struct JITDUMP_RECORD_HEADER
+{
+ u32 id;
+ u32 total_size;
+ u64 timestamp;
+};
+struct JITDUMP_CODE_LOAD
+{
+ JITDUMP_RECORD_HEADER header;
+ u32 pid;
+ u32 tid;
+ u64 vma;
+ u64 code_addr;
+ u64 code_size;
+ u64 code_index;
+ // name
+};
+#pragma pack(pop)
+
+static u64 JitDumpTimestamp()
+{
+ struct timespec ts = {};
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (static_cast(ts.tv_sec) * 1000000000ULL) + static_cast(ts.tv_nsec);
+}
+
+static FILE* s_jitdump_file = nullptr;
+static bool s_jitdump_file_opened = false;
+static std::mutex s_jitdump_mutex;
+static u32 s_jitdump_record_id;
+
+static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
+{
+ const u32 namelen = std::strlen(symbol) + 1;
+
+ std::unique_lock lock(s_jitdump_mutex);
+ if (!s_jitdump_file)
+ {
+ if (!s_jitdump_file_opened)
+ {
+ char file[256];
+ snprintf(file, std::size(file), "jit-%d.dump", getpid());
+ s_jitdump_file = fopen(file, "w+b");
+ s_jitdump_file_opened = true;
+ if (!s_jitdump_file)
+ return;
+ }
+
+ void* perf_marker = mmap(nullptr, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(s_jitdump_file), 0);
+ AssertMsg(perf_marker != MAP_FAILED, "Map perf marker");
+
+ JITDUMP_HEADER jh = {};
+#if defined(__aarch64__)
+ jh.elf_mach = EM_AARCH64;
+#else
+ jh.elf_mach = EM_X86_64;
+#endif
+ jh.pid = getpid();
+ jh.timestamp = JitDumpTimestamp();
+ std::fwrite(&jh, sizeof(jh), 1, s_jitdump_file);
+ }
+
+ JITDUMP_CODE_LOAD cl = {};
+ cl.header.id = JIT_CODE_LOAD;
+ cl.header.total_size = sizeof(cl) + namelen + static_cast(size);
+ cl.header.timestamp = JitDumpTimestamp();
+ cl.pid = getpid();
+ cl.tid = syscall(SYS_gettid);
+ cl.vma = 0;
+ cl.code_addr = static_cast(reinterpret_cast(ptr));
+ cl.code_size = static_cast(size);
+ cl.code_index = s_jitdump_record_id++;
+ std::fwrite(&cl, sizeof(cl), 1, s_jitdump_file);
+ std::fwrite(symbol, namelen, 1, s_jitdump_file);
+ std::fwrite(ptr, size, 1, s_jitdump_file);
+ std::fflush(s_jitdump_file);
+}
+
+#endif
+
+#if defined(__linux__) && (defined(ProfileWithPerf) || defined(ProfileWithPerfJitDump))
+
+void PerfScope::Register(const void* ptr, size_t size, const char* symbol)
+{
+ char full_symbol[128];
+ if (HasPrefix())
+ std::snprintf(full_symbol, std::size(full_symbol), "%s_%s", m_prefix, symbol);
+ else
+ StringUtil::Strlcpy(full_symbol, symbol, std::size(full_symbol));
+ RegisterMethod(ptr, size, full_symbol);
+}
+
+void PerfScope::RegisterPC(const void* ptr, size_t size, u32 pc)
+{
+ char full_symbol[128];
+ if (HasPrefix())
+ std::snprintf(full_symbol, std::size(full_symbol), "%s_%08X", m_prefix, pc);
+ else
+ std::snprintf(full_symbol, std::size(full_symbol), "%08X", pc);
+ RegisterMethod(ptr, size, full_symbol);
+}
+
+void PerfScope::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
+{
+ char full_symbol[128];
+ if (HasPrefix())
+ std::snprintf(full_symbol, std::size(full_symbol), "%s_%s%016" PRIX64, m_prefix, prefix, key);
+ else
+ std::snprintf(full_symbol, std::size(full_symbol), "%s%016" PRIX64, prefix, key);
+ RegisterMethod(ptr, size, full_symbol);
+}
+
+#else
+
+void PerfScope::Register(const void* ptr, size_t size, const char* symbol)
+{
+}
+void PerfScope::RegisterPC(const void* ptr, size_t size, u32 pc)
+{
+}
+void PerfScope::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
+{
+}
+
+#endif
diff --git a/src/common/perf_scope.h b/src/common/perf_scope.h
new file mode 100644
index 000000000..803b62297
--- /dev/null
+++ b/src/common/perf_scope.h
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: 2023 Connor McLaughlin , PCSX2 Team
+// SPDX-License-Identifier: GPL-3.0
+
+#pragma once
+
+#include "types.h"
+
+class PerfScope
+{
+public:
+ constexpr PerfScope(const char* prefix) : m_prefix(prefix) {}
+ bool HasPrefix() const { return (m_prefix && m_prefix[0]); }
+
+ void Register(const void* ptr, size_t size, const char* symbol);
+ void RegisterPC(const void* ptr, size_t size, u32 pc);
+ void RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key);
+
+private:
+ const char* m_prefix;
+};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index fcd06ca9f..d2e070ddb 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -19,6 +19,7 @@ add_library(core
controller.h
cpu_code_cache.cpp
cpu_code_cache.h
+ cpu_code_cache_private.h
cpu_core.cpp
cpu_core.h
cpu_core_private.h
diff --git a/src/core/bus.h b/src/core/bus.h
index 88c4b38ce..59a8c06ae 100644
--- a/src/core/bus.h
+++ b/src/core/bus.h
@@ -85,8 +85,8 @@ enum : TickCount
enum : u32
{
- RAM_2MB_CODE_PAGE_COUNT = (RAM_2MB_SIZE + (HOST_PAGE_SIZE + 1)) / HOST_PAGE_SIZE,
- RAM_8MB_CODE_PAGE_COUNT = (RAM_8MB_SIZE + (HOST_PAGE_SIZE + 1)) / HOST_PAGE_SIZE,
+ RAM_2MB_CODE_PAGE_COUNT = (RAM_2MB_SIZE + (HOST_PAGE_SIZE - 1)) / HOST_PAGE_SIZE,
+ RAM_8MB_CODE_PAGE_COUNT = (RAM_8MB_SIZE + (HOST_PAGE_SIZE - 1)) / HOST_PAGE_SIZE,
MEMORY_LUT_PAGE_SIZE = 4096,
MEMORY_LUT_PAGE_SHIFT = 12,
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index 1b9275f2f..6366658d7 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -85,6 +85,7 @@
+
@@ -176,6 +177,9 @@
{73ee0c55-6ffe-44e7-9c12-baa52434a797}
+
+ {c51a346a-86b2-46df-9bb3-d0aa7e5d8699}
+
{075ced82-6a20-46df-94c7-9624ac9ddbeb}
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index fb7699b44..f0bd545d4 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -124,5 +124,6 @@
+
\ No newline at end of file
diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp
index 3af3a84a8..45ba18bc7 100644
--- a/src/core/cpu_code_cache.cpp
+++ b/src/core/cpu_code_cache.cpp
@@ -1,10 +1,8 @@
-// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin
+// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
-#include "cpu_code_cache.h"
#include "bus.h"
-#include "common/assert.h"
-#include "common/log.h"
+#include "cpu_code_cache_private.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_disasm.h"
@@ -12,27 +10,110 @@
#include "settings.h"
#include "system.h"
#include "timing_event.h"
+
+#include "common/assert.h"
+#include "common/intrin.h"
+#include "common/log.h"
+
Log_SetChannel(CPU::CodeCache);
#ifdef ENABLE_RECOMPILER
#include "cpu_recompiler_code_generator.h"
#endif
+#include
#include
namespace CPU::CodeCache {
-static constexpr bool USE_BLOCK_LINKING = true;
+using LUTRangeList = std::array, 9>;
+using PageProtectionArray = std::array;
+using BlockInstructionInfoPair = std::pair;
+using BlockInstructionList = std::vector;
-// Fall blocks back to interpreter if we recompile more than 20 times within 100 frames.
-static constexpr u32 RECOMPILE_FRAMES_TO_FALL_BACK_TO_INTERPRETER = 100;
-static constexpr u32 RECOMPILE_COUNT_TO_FALL_BACK_TO_INTERPRETER = 20;
-static constexpr u32 INVALIDATE_THRESHOLD_TO_DISABLE_LINKING = 10;
+// Switch to manual protection if we invalidate more than 4 times within 20 frames.
+// Fall blocks back to interpreter if we recompile more than 3 times within 15 frames.
+// The interpreter fallback is set before the manual protection switch, so that if it's just a single block
+// which is constantly getting mutated, we won't hurt the performance of the rest in the page.
+static constexpr u32 RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK = 3;
+static constexpr u32 RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK = 15;
+static constexpr u32 INVALIDATE_COUNT_FOR_MANUAL_PROTECTION = 4;
+static constexpr u32 INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION = 20;
-#ifdef ENABLE_RECOMPILER
+static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr);
+static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr);
+static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc);
-// Currently remapping the code buffer doesn't work in macOS or Haiku.
-#if !defined(__HAIKU__) && !defined(__APPLE__)
+static void AllocateLUTs();
+static void DeallocateLUTs();
+static void ResetCodeLUT();
+static void SetCodeLUT(u32 pc, const void* function);
+static void InvalidateBlock(Block* block, BlockState new_state);
+static void ClearBlocks();
+
+static Block* LookupBlock(u32 pc);
+static Block* CreateBlock(u32 pc, const BlockInstructionList& instructions, const BlockMetadata& metadata);
+static bool IsBlockCodeCurrent(const Block* block);
+static bool RevalidateBlock(Block* block);
+PageProtectionMode GetProtectionModeForPC(u32 pc);
+PageProtectionMode GetProtectionModeForBlock(const Block* block);
+static bool ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata);
+static void FillBlockRegInfo(Block* block);
+static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src);
+static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
+static void AddBlockToPageList(Block* block);
+
+static Common::PageFaultHandler::HandlerResult ExceptionHandler(void* exception_pc, void* fault_address, bool is_write);
+
+static Block* CreateCachedInterpreterBlock(u32 pc);
+[[noreturn]] static void ExecuteCachedInterpreter();
+template
+[[noreturn]] static void ExecuteCachedInterpreterImpl();
+
+// Fast map provides lookup from PC to function
+// Function pointers are offset so that you don't need to subtract
+CodeLUTArray g_code_lut;
+static BlockLUTArray s_block_lut;
+static std::unique_ptr s_lut_code_pointers;
+static std::unique_ptr s_lut_block_pointers;
+static PageProtectionArray s_page_protection = {};
+static std::vector s_blocks;
+
+// for compiling - reuse to avoid allocations
+static BlockInstructionList s_block_instructions;
+
+#ifdef ENABLE_RECOMPILER_SUPPORT
+
+static void BacklinkBlocks(u32 pc, const void* dst);
+static void UnlinkBlockExits(Block* block);
+
+static void ClearASMFunctions();
+static void CompileASMFunctions();
+static bool CompileBlock(Block* block);
+static Common::PageFaultHandler::HandlerResult HandleFastmemException(void* exception_pc, void* fault_address,
+ bool is_write);
+static void BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info);
+
+static BlockLinkMap s_block_links;
+static std::unordered_map s_fastmem_backpatch_info;
+static std::unordered_set s_fastmem_faulting_pcs;
+
+NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
+const void* g_compile_or_revalidate_block;
+const void* g_check_events_and_dispatch;
+const void* g_run_events_and_dispatch;
+const void* g_dispatcher;
+const void* g_interpret_block;
+const void* g_discard_and_recompile_block;
+
+#ifdef ENABLE_RECOMPILER_PROFILING
+
+PerfScope MIPSPerfScope("MIPS");
+
+#endif
+
+// Currently remapping the code buffer doesn't work in macOS. TODO: Make dynamic instead...
+#ifndef __APPLE__
#define USE_STATIC_CODE_BUFFER 1
#endif
@@ -44,75 +125,137 @@ static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 8 * 1024 * 1024;
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
#endif
-static constexpr u32 CODE_WRITE_FAULT_THRESHOLD_FOR_SLOWMEM = 10;
#ifdef USE_STATIC_CODE_BUFFER
static constexpr u32 RECOMPILER_GUARD_SIZE = 4096;
-alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
- s_code_storage[RECOMPILER_CODE_CACHE_SIZE + RECOMPILER_FAR_CODE_CACHE_SIZE];
+alignas(HOST_PAGE_SIZE) static u8 s_code_storage[RECOMPILER_CODE_CACHE_SIZE + RECOMPILER_FAR_CODE_CACHE_SIZE];
#endif
static JitCodeBuffer s_code_buffer;
+#ifdef _DEBUG
+static u32 s_total_instructions_compiled = 0;
+static u32 s_total_host_instructions_emitted = 0;
#endif
-#ifdef ENABLE_RECOMPILER
-static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT];
-static std::unique_ptr s_fast_map_pointers;
+#endif // ENABLE_RECOMPILER_SUPPORT
+} // namespace CPU::CodeCache
-DispatcherFunction s_asm_dispatcher;
-SingleBlockDispatcherFunction s_single_block_asm_dispatcher;
-
-static FastMapTable DecodeFastMapPointer(u32 slot, FastMapTable ptr)
+bool CPU::CodeCache::IsUsingAnyRecompiler()
{
- if constexpr (sizeof(void*) == 8)
- return reinterpret_cast(reinterpret_cast(ptr) + (static_cast(slot) << 17));
- else
- return reinterpret_cast(reinterpret_cast(ptr) + (slot << 16));
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ return g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler;
+#else
+ return false;
+#endif
}
-static FastMapTable EncodeFastMapPointer(u32 slot, FastMapTable ptr)
+bool CPU::CodeCache::IsUsingFastmem()
{
- if constexpr (sizeof(void*) == 8)
- return reinterpret_cast(reinterpret_cast(ptr) - (static_cast(slot) << 17));
- else
- return reinterpret_cast(reinterpret_cast(ptr) - (slot << 16));
+ return IsUsingAnyRecompiler() && g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled;
}
-static CodeBlock::HostCodePointer* OffsetFastMapPointer(FastMapTable fake_ptr, u32 pc)
+void CPU::CodeCache::ProcessStartup()
{
- u8* fake_byte_ptr = reinterpret_cast(fake_ptr);
- if constexpr (sizeof(void*) == 8)
- return reinterpret_cast(fake_byte_ptr + (static_cast(pc) << 1));
- else
- return reinterpret_cast(fake_byte_ptr + pc);
-}
+ AllocateLUTs();
-static void CompileDispatcher();
-static void FastCompileBlockFunction();
-static void InvalidCodeFunction();
-
-static constexpr u32 GetTableCount(u32 start, u32 end)
-{
- return ((end >> FAST_MAP_TABLE_SHIFT) - (start >> FAST_MAP_TABLE_SHIFT)) + 1;
-}
-
-static void AllocateFastMapTables(u32 start, u32 end, FastMapTable& table_ptr)
-{
- const u32 start_slot = start >> FAST_MAP_TABLE_SHIFT;
- const u32 count = GetTableCount(start, end);
- for (u32 i = 0; i < count; i++)
+#ifdef ENABLE_RECOMPILER_SUPPORT
+#ifdef USE_STATIC_CODE_BUFFER
+ const bool has_buffer = s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage),
+ RECOMPILER_FAR_CODE_CACHE_SIZE, RECOMPILER_GUARD_SIZE);
+#else
+ const bool has_buffer = false;
+#endif
+ if (!has_buffer && !s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
{
- const u32 slot = start_slot + i;
-
- s_fast_map[slot] = EncodeFastMapPointer(slot, table_ptr);
- table_ptr += FAST_MAP_TABLE_SIZE;
+ Panic("Failed to initialize code space");
}
+#endif
+
+ if (!Common::PageFaultHandler::InstallHandler(&s_block_lut, &ExceptionHandler))
+ Panic("Failed to install page fault handler");
}
-static void AllocateFastMap()
+void CPU::CodeCache::ProcessShutdown()
{
- static constexpr VirtualMemoryAddress ranges[][2] = {
+ Common::PageFaultHandler::RemoveHandler(&s_block_lut);
+
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ s_code_buffer.Destroy();
+#endif
+
+ DeallocateLUTs();
+}
+
+void CPU::CodeCache::Initialize()
+{
+ Assert(s_blocks.empty());
+
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ if (IsUsingAnyRecompiler())
+ {
+ s_code_buffer.Reset();
+ CompileASMFunctions();
+ ResetCodeLUT();
+ }
+#endif
+
+ Bus::UpdateFastmemViews(IsUsingAnyRecompiler() ? g_settings.cpu_fastmem_mode : CPUFastmemMode::Disabled);
+ CPU::UpdateMemoryPointers();
+}
+
+void CPU::CodeCache::Shutdown()
+{
+ ClearBlocks();
+
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ ClearASMFunctions();
+#endif
+
+ Bus::UpdateFastmemViews(CPUFastmemMode::Disabled);
+ CPU::UpdateMemoryPointers();
+}
+
+void CPU::CodeCache::Reset()
+{
+ ClearBlocks();
+
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ if (IsUsingAnyRecompiler())
+ {
+ ClearASMFunctions();
+ s_code_buffer.Reset();
+ CompileASMFunctions();
+ ResetCodeLUT();
+ }
+#endif
+}
+
+void CPU::CodeCache::Execute()
+{
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ if (IsUsingAnyRecompiler())
+ g_enter_recompiler();
+ else
+ ExecuteCachedInterpreter();
+#else
+ ExecuteCachedInterpreter();
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// MARK: - Block Management
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace CPU::CodeCache {
+static constexpr u32 GetLUTTableCount(u32 start, u32 end)
+{
+ return ((end >> LUT_TABLE_SHIFT) - (start >> LUT_TABLE_SHIFT)) + 1;
+}
+
+static constexpr LUTRangeList GetLUTRanges()
+{
+ const LUTRangeList ranges = {{
{0x00000000, 0x00800000}, // RAM
{0x1F000000, 0x1F800000}, // EXP1
{0x1FC00000, 0x1FC80000}, // BIOS
@@ -124,418 +267,524 @@ static void AllocateFastMap()
{0xA0000000, 0xA0800000}, // RAM
{0xBF000000, 0xBF800000}, // EXP1
{0xBFC00000, 0xBFC80000} // BIOS
- };
+ }};
+ return ranges;
+}
- u32 num_tables = 1; // unreachable table
- for (u32 i = 0; i < countof(ranges); i++)
- num_tables += GetTableCount(ranges[i][0], ranges[i][1]);
+static constexpr u32 GetLUTSlotCount(bool include_unreachable)
+{
+ u32 tables = include_unreachable ? 1 : 0; // unreachable table
+ for (const auto& [start, end] : GetLUTRanges())
+ tables += GetLUTTableCount(start, end);
- const u32 num_slots = FAST_MAP_TABLE_SIZE * num_tables;
- if (!s_fast_map_pointers)
- s_fast_map_pointers = std::make_unique(num_slots);
+ return tables * LUT_TABLE_SIZE;
+}
+} // namespace CPU::CodeCache
- FastMapTable table_ptr = s_fast_map_pointers.get();
- FastMapTable table_ptr_end = table_ptr + num_slots;
+CPU::CodeCache::CodeLUT CPU::CodeCache::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr)
+{
+ if constexpr (sizeof(void*) == 8)
+ return reinterpret_cast(reinterpret_cast(ptr) + (static_cast(slot) << 17));
+ else
+ return reinterpret_cast(reinterpret_cast(ptr) + (slot << 16));
+}
- // Fill the first table with invalid/unreachable.
- for (u32 i = 0; i < FAST_MAP_TABLE_SIZE; i++)
- table_ptr[i] = InvalidCodeFunction;
+CPU::CodeCache::CodeLUT CPU::CodeCache::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr)
+{
+ if constexpr (sizeof(void*) == 8)
+ return reinterpret_cast(reinterpret_cast(ptr) - (static_cast(slot) << 17));
+ else
+ return reinterpret_cast(reinterpret_cast(ptr) - (slot << 16));
+}
- // And the remaining with block compile pointers.
- for (u32 i = FAST_MAP_TABLE_SIZE; i < num_slots; i++)
- table_ptr[i] = FastCompileBlockFunction;
+CPU::CodeCache::CodeLUT CPU::CodeCache::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc)
+{
+ u8* fake_byte_ptr = reinterpret_cast(fake_ptr);
+ if constexpr (sizeof(void*) == 8)
+ return reinterpret_cast(fake_byte_ptr + (static_cast(pc) << 1));
+ else
+ return reinterpret_cast(fake_byte_ptr + pc);
+}
+
+void CPU::CodeCache::AllocateLUTs()
+{
+ constexpr u32 num_code_slots = GetLUTSlotCount(true);
+ constexpr u32 num_block_slots = GetLUTSlotCount(false);
+
+ Assert(!s_lut_code_pointers && !s_lut_block_pointers);
+ s_lut_code_pointers = std::make_unique(num_code_slots);
+ s_lut_block_pointers = std::make_unique(num_block_slots);
+ std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * num_block_slots);
+
+ CodeLUT code_table_ptr = s_lut_code_pointers.get();
+ Block** block_table_ptr = s_lut_block_pointers.get();
+ CodeLUT const code_table_ptr_end = code_table_ptr + num_code_slots;
+ Block** const block_table_ptr_end = block_table_ptr + num_block_slots;
+
+ // Make the unreachable table jump to the invalid code callback.
+ MemsetPtrs(code_table_ptr, static_cast(nullptr), LUT_TABLE_COUNT);
// Mark everything as unreachable to begin with.
- for (u32 i = 0; i < FAST_MAP_TABLE_COUNT; i++)
- s_fast_map[i] = EncodeFastMapPointer(i, table_ptr);
- table_ptr += FAST_MAP_TABLE_SIZE;
+ for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
+ {
+ g_code_lut[i] = EncodeCodeLUTPointer(i, code_table_ptr);
+ s_block_lut[i] = nullptr;
+ }
+ code_table_ptr += LUT_TABLE_SIZE;
// Allocate ranges.
- for (u32 i = 0; i < countof(ranges); i++)
- AllocateFastMapTables(ranges[i][0], ranges[i][1], table_ptr);
-
- Assert(table_ptr == table_ptr_end);
-}
-
-static void ResetFastMap()
-{
- if (!s_fast_map_pointers)
- return;
-
- for (u32 i = 0; i < FAST_MAP_TABLE_COUNT; i++)
+ for (const auto& [start, end] : GetLUTRanges())
{
- FastMapTable ptr = DecodeFastMapPointer(i, s_fast_map[i]);
- if (ptr == s_fast_map_pointers.get())
- continue;
-
- for (u32 j = 0; j < FAST_MAP_TABLE_SIZE; j++)
- ptr[j] = FastCompileBlockFunction;
- }
-}
-
-static void FreeFastMap()
-{
- std::memset(s_fast_map, 0, sizeof(s_fast_map));
- s_fast_map_pointers.reset();
-}
-
-static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
-{
- if (!s_fast_map_pointers)
- return;
-
- const u32 slot = pc >> FAST_MAP_TABLE_SHIFT;
- FastMapTable encoded_ptr = s_fast_map[slot];
-
- const FastMapTable table_ptr = DecodeFastMapPointer(slot, encoded_ptr);
- Assert(table_ptr != nullptr && table_ptr != s_fast_map_pointers.get());
-
- CodeBlock::HostCodePointer* ptr = OffsetFastMapPointer(encoded_ptr, pc);
- *ptr = function;
-}
-
-#endif
-
-using BlockMap = std::unordered_map;
-using HostCodeMap = std::map;
-
-void LogCurrentState();
-
-/// Returns the block key for the current execution state.
-static CodeBlockKey GetNextBlockKey();
-
-/// Looks up the block in the cache if it's already been compiled.
-static CodeBlock* LookupBlock(CodeBlockKey key, bool allow_flush);
-
-/// Can the current block execute? This will re-validate the block if necessary.
-/// The block can also be flushed if recompilation failed, so ignore the pointer if false is returned.
-static bool RevalidateBlock(CodeBlock* block, bool allow_flush);
-
-static bool CompileBlock(CodeBlock* block, bool allow_flush);
-static void RemoveReferencesToBlock(CodeBlock* block);
-static void AddBlockToPageMap(CodeBlock* block);
-static void RemoveBlockFromPageMap(CodeBlock* block);
-
-/// Link block from to to. Returns the successor index.
-static void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
-
-/// Unlink all blocks which point to this block, and any that this block links to.
-static void UnlinkBlock(CodeBlock* block);
-
-static void ClearState();
-
-static BlockMap s_blocks;
-static std::array, Bus::RAM_8MB_CODE_PAGE_COUNT> m_ram_block_map;
-
-#ifdef ENABLE_RECOMPILER
-static HostCodeMap s_host_code_map;
-
-static void AddBlockToHostCodeMap(CodeBlock* block);
-static void RemoveBlockFromHostCodeMap(CodeBlock* block);
-
-static bool InitializeFastmem();
-static void ShutdownFastmem();
-static Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, void* fault_address,
- bool is_write);
-#ifdef ENABLE_MMAP_FASTMEM
-static Common::PageFaultHandler::HandlerResult MMapPageFaultHandler(void* exception_pc, void* fault_address,
- bool is_write);
-#endif
-#endif // ENABLE_RECOMPILER
-
-void Initialize()
-{
- Assert(s_blocks.empty());
-
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler())
- {
-#ifdef USE_STATIC_CODE_BUFFER
- const bool has_buffer = s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage),
- RECOMPILER_FAR_CODE_CACHE_SIZE, RECOMPILER_GUARD_SIZE);
-#else
- const bool has_buffer = false;
-#endif
- if (!has_buffer && !s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
+ const u32 start_slot = start >> LUT_TABLE_SHIFT;
+ const u32 count = GetLUTTableCount(start, end);
+ for (u32 i = 0; i < count; i++)
{
- Panic("Failed to initialize code space");
+ const u32 slot = start_slot + i;
+
+ g_code_lut[slot] = EncodeCodeLUTPointer(slot, code_table_ptr);
+ code_table_ptr += LUT_TABLE_SIZE;
+
+ s_block_lut[slot] = block_table_ptr;
+ block_table_ptr += LUT_TABLE_SIZE;
}
}
+
+ Assert(code_table_ptr == code_table_ptr_end);
+ Assert(block_table_ptr == block_table_ptr_end);
+}
+
+void CPU::CodeCache::DeallocateLUTs()
+{
+ s_lut_block_pointers.reset();
+ s_lut_code_pointers.reset();
+}
+
+void CPU::CodeCache::ResetCodeLUT()
+{
+ if (!s_lut_code_pointers)
+ return;
+
+ // Make the unreachable table jump to the invalid code callback.
+ MemsetPtrs(s_lut_code_pointers.get(), g_interpret_block, LUT_TABLE_COUNT);
+
+ for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
+ {
+ CodeLUT ptr = DecodeCodeLUTPointer(i, g_code_lut[i]);
+ if (ptr == s_lut_code_pointers.get())
+ continue;
+
+ MemsetPtrs(ptr, g_compile_or_revalidate_block, LUT_TABLE_SIZE);
+ }
+}
+
+void CPU::CodeCache::SetCodeLUT(u32 pc, const void* function)
+{
+ if (!s_lut_code_pointers)
+ return;
+
+ const u32 table = pc >> LUT_TABLE_SHIFT;
+ CodeLUT encoded_ptr = g_code_lut[table];
+
+#ifdef _DEBUG
+ const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr);
+ DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get());
#endif
- AllocateFastMap();
+ *OffsetCodeLUTPointer(encoded_ptr, pc) = function;
+}
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler())
+CPU::CodeCache::Block* CPU::CodeCache::LookupBlock(u32 pc)
+{
+ const u32 table = pc >> LUT_TABLE_SHIFT;
+ if (!s_block_lut[table])
+ return nullptr;
+
+ const u32 idx = (pc & 0xFFFF) >> 2;
+ return s_block_lut[table][idx];
+}
+
+CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructionList& instructions,
+ const BlockMetadata& metadata)
+{
+ const u32 size = static_cast(instructions.size());
+ const u32 table = pc >> LUT_TABLE_SHIFT;
+ Assert(s_block_lut[table]);
+
+ // retain from old block
+ const u32 frame_number = System::GetFrameNumber();
+ u32 recompile_frame = System::GetFrameNumber();
+ u8 recompile_count = 0;
+
+ const u32 idx = (pc & 0xFFFF) >> 2;
+ Block* block = s_block_lut[table][idx];
+ if (block)
{
- if (g_settings.IsUsingFastmem() && !InitializeFastmem())
- Panic("Failed to initialize fastmem");
+ // shouldn't be in the page list.. since we should come here after invalidating
+ Assert(!block->next_block_in_page);
- AllocateFastMap();
- CompileDispatcher();
- ResetFastMap();
+ // keep recompile stats before resetting, that way we actually count recompiles
+ recompile_frame = block->compile_frame;
+ recompile_count = block->compile_count;
+
+ // if it has the same number of instructions, we can reuse it
+ if (block->size != size)
+ {
+ // this sucks.. hopefully won't happen very often
+ // TODO: allocate max size, allow shrink but not grow
+ auto it = std::find(s_blocks.begin(), s_blocks.end(), block);
+ Assert(it != s_blocks.end());
+ s_blocks.erase(it);
+
+ std::free(block);
+ block = nullptr;
+ }
+ }
+
+ if (!block)
+ {
+ block =
+ static_cast(std::malloc(sizeof(Block) + (sizeof(Instruction) * size) + (sizeof(InstructionInfo) * size)));
+ Assert(block);
+ s_blocks.push_back(block);
+ }
+
+ block->pc = pc;
+ block->size = size;
+ block->host_code = nullptr;
+ block->next_block_in_page = nullptr;
+ block->num_exit_links = 0;
+ block->state = BlockState::Valid;
+ block->flags = metadata.flags;
+ block->protection = GetProtectionModeForBlock(block);
+ block->uncached_fetch_ticks = metadata.uncached_fetch_ticks;
+ block->icache_line_count = metadata.icache_line_count;
+ block->compile_frame = recompile_frame;
+ block->compile_count = recompile_count + 1;
+
+ // copy instructions/info
+ {
+ const std::pair* ip = instructions.data();
+ Instruction* dsti = block->Instructions();
+ InstructionInfo* dstii = block->InstructionsInfo();
+
+ for (u32 i = 0; i < size; i++, ip++, dsti++, dstii++)
+ {
+ dsti->bits = ip->first.bits;
+ *dstii = ip->second;
+ }
+ }
+
+ s_block_lut[table][idx] = block;
+
+ // if the block is being recompiled too often, leave it in the list, but don't compile it.
+ const u32 frame_delta = frame_number - recompile_frame;
+ if (frame_delta >= RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK)
+ {
+ block->compile_frame = frame_number;
+ block->compile_count = 1;
+ }
+ else if (block->compile_count >= RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK)
+ {
+ Log_DevFmt("{} recompiles in {} frames to block 0x{:08X}, not caching.", block->compile_count, frame_delta,
+ block->pc);
+ block->size = 0;
+ }
+
+ // cached interpreter creates empty blocks when falling back
+ if (block->size == 0)
+ {
+ block->state = BlockState::FallbackToInterpreter;
+ block->protection = PageProtectionMode::Unprotected;
+ return block;
+ }
+
+ // TODO: Only used by NewRec for now, don't waste time filling it.
+ if constexpr (false)
+ FillBlockRegInfo(block);
+
+ // add it to the tracking list for its page
+ AddBlockToPageList(block);
+
+ return block;
+}
+
+bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block)
+{
+ // blocks shouldn't be wrapping..
+ const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(block->pc);
+ DebugAssert((phys_addr + (sizeof(Instruction) * block->size)) <= Bus::g_ram_size);
+
+ // can just do a straight memcmp..
+ return (std::memcmp(Bus::g_ram + phys_addr, block->Instructions(), sizeof(Instruction) * block->size) == 0);
+}
+
+bool CPU::CodeCache::RevalidateBlock(Block* block)
+{
+ DebugAssert(block->state != BlockState::Valid);
+ DebugAssert(AddressInRAM(block->pc));
+
+ if (block->state >= BlockState::NeedsRecompile)
+ return false;
+
+ // Protection may have changed if we didn't execute before it got invalidated again. e.g. THPS2.
+ if (block->protection != GetProtectionModeForBlock(block))
+ return false;
+
+ if (!IsBlockCodeCurrent(block))
+ {
+ // changed, needs recompiling
+ Log_DebugPrintf("Block at PC %08X has changed and needs recompiling", block->pc);
+ return false;
+ }
+
+ block->state = BlockState::Valid;
+ AddBlockToPageList(block);
+ return true;
+}
+
+void CPU::CodeCache::AddBlockToPageList(Block* block)
+{
+ DebugAssert(block->size > 0);
+ if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
+ return;
+
+ const u32 page_idx = block->StartPageIndex();
+ PageProtectionInfo& entry = s_page_protection[page_idx];
+ Bus::SetRAMCodePage(page_idx);
+
+ if (entry.last_block_in_page)
+ {
+ entry.last_block_in_page->next_block_in_page = block;
+ entry.last_block_in_page = block;
+ }
+ else
+ {
+ entry.first_block_in_page = block;
+ entry.last_block_in_page = block;
+ }
+}
+
+void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index)
+{
+ DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
+ Bus::ClearRAMCodePage(index);
+
+ BlockState new_block_state = BlockState::Invalidated;
+ PageProtectionInfo& ppi = s_page_protection[index];
+
+ const u32 frame_number = System::GetFrameNumber();
+ const u32 frame_delta = frame_number - ppi.invalidate_frame;
+ ppi.invalidate_count++;
+
+ if (frame_delta >= INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION)
+ {
+ ppi.invalidate_count = 1;
+ ppi.invalidate_frame = frame_number;
+ }
+ else if (ppi.invalidate_count > INVALIDATE_COUNT_FOR_MANUAL_PROTECTION)
+ {
+ Log_DevFmt("{} invalidations in {} frames to page {} [0x{:08X} -> 0x{:08X}], switching to manual protection",
+ ppi.invalidate_count, frame_delta, index, (index * HOST_PAGE_SIZE), ((index + 1) * HOST_PAGE_SIZE));
+ ppi.mode = PageProtectionMode::ManualCheck;
+ new_block_state = BlockState::NeedsRecompile;
+ }
+
+ Block* block = ppi.first_block_in_page;
+ while (block)
+ {
+ InvalidateBlock(block, new_block_state);
+ block = std::exchange(block->next_block_in_page, nullptr);
+ }
+
+ ppi.first_block_in_page = nullptr;
+ ppi.last_block_in_page = nullptr;
+}
+
+CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForPC(u32 pc)
+{
+ if (!AddressInRAM(pc))
+ return PageProtectionMode::Unprotected;
+
+ const u32 page_idx = Bus::GetRAMCodePageIndex(pc);
+ return s_page_protection[page_idx].mode;
+}
+
+CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForBlock(const Block* block)
+{
+ // if the block has a branch delay slot crossing a page, we must use manual protection.
+ // no other way about it.
+ if (block->HasFlag(BlockFlags::BranchDelaySpansPages))
+ return PageProtectionMode::ManualCheck;
+
+ return GetProtectionModeForPC(block->pc);
+}
+
+void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state)
+{
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ if (block->state == BlockState::Valid)
+ {
+ SetCodeLUT(block->pc, g_compile_or_revalidate_block);
+ BacklinkBlocks(block->pc, g_compile_or_revalidate_block);
}
#endif
+
+ block->state = new_state;
}
-void ClearState()
+void CPU::CodeCache::InvalidateAllRAMBlocks()
{
+ // TODO: maybe combine the backlink into one big instruction flush cache?
+
+ for (Block* block : s_blocks)
+ {
+ if (AddressInRAM(block->pc))
+ InvalidateBlock(block, BlockState::Invalidated);
+ }
+
Bus::ClearRAMCodePageFlags();
- for (auto& it : m_ram_block_map)
- it.clear();
+}
- for (const auto& it : s_blocks)
- delete it.second;
+void CPU::CodeCache::ClearBlocks()
+{
+ for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++)
+ {
+ PageProtectionInfo& ppi = s_page_protection[i];
+ if (ppi.mode == PageProtectionMode::WriteProtected && ppi.first_block_in_page)
+ Bus::ClearRAMCodePage(i);
+ ppi = {};
+ }
+
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ s_fastmem_backpatch_info.clear();
+ s_fastmem_faulting_pcs.clear();
+ s_block_links.clear();
+#endif
+
+ for (Block* block : s_blocks)
+ std::free(block);
s_blocks.clear();
-#ifdef ENABLE_RECOMPILER
- s_host_code_map.clear();
- s_code_buffer.Reset();
- ResetFastMap();
+
+ std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * GetLUTSlotCount(false));
+}
+
+Common::PageFaultHandler::HandlerResult CPU::CodeCache::ExceptionHandler(void* exception_pc, void* fault_address,
+ bool is_write)
+{
+ // TODO: Catch general RAM writes, not just fastmem
+#ifdef ENABLE_RECOMPILER_SUPPORT
+ return HandleFastmemException(exception_pc, fault_address, is_write);
+#else
+ return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
#endif
}
-void Shutdown()
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// MARK: - Cached Interpreter
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+CPU::CodeCache::Block* CPU::CodeCache::CreateCachedInterpreterBlock(u32 pc)
{
- ClearState();
-#ifdef ENABLE_RECOMPILER
- ShutdownFastmem();
- FreeFastMap();
- s_code_buffer.Destroy();
-#endif
+ BlockMetadata metadata = {};
+ ReadBlockInstructions(pc, &s_block_instructions, &metadata);
+ return CreateBlock(pc, s_block_instructions, metadata);
}
template
-[[noreturn]] static void ExecuteImpl()
+[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreterImpl()
{
- CodeBlockKey next_block_key;
+#define CHECK_DOWNCOUNT() \
+ if (g_state.pending_ticks >= g_state.downcount) \
+ break;
for (;;)
{
TimingEvents::RunEvents();
- next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount)
{
- CodeBlock* block = LookupBlock(next_block_key, true);
- if (!block)
- {
- InterpretUncachedBlock();
- next_block_key = GetNextBlockKey();
- continue;
- }
-
- reexecute_block:
- Assert(!(HasPendingInterrupt()));
-
-#if 0
- const u32 tick = TimingEvents::GetGlobalTickCounter() + CPU::GetPendingTicks();
- if (tick == 4188233674)
- __debugbreak();
-#endif
-
#if 0
LogCurrentState();
#endif
+#if 0
+ if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 3301006214)
+ __debugbreak();
+#endif
+ // Manually done because we don't want to compile blocks without a LUT.
+ const u32 pc = g_state.pc;
+ const u32 table = pc >> LUT_TABLE_SHIFT;
+ Block* block;
+ if (s_block_lut[table])
+ {
+ const u32 idx = (pc & 0xFFFF) >> 2;
+ block = s_block_lut[table][idx];
+ }
+ else
+ {
+ // Likely invalid code...
+ goto interpret_block;
+ }
+
+ reexecute_block:
+ if (!block)
+ {
+ if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
+ goto interpret_block;
+ }
+ else
+ {
+ if (block->state == BlockState::FallbackToInterpreter) [[unlikely]]
+ goto interpret_block;
+
+ if ((block->state != BlockState::Valid && !RevalidateBlock(block)) ||
+ (block->protection == PageProtectionMode::ManualCheck && !IsBlockCodeCurrent(block)))
+ {
+ if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
+ goto interpret_block;
+ }
+ }
+
+ // TODO: make DebugAssert
+ Assert(!(HasPendingInterrupt()));
if (g_settings.cpu_recompiler_icache)
CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks);
- InterpretCachedBlock(*block);
+ InterpretCachedBlock(block);
- if (g_state.pending_ticks >= g_state.downcount)
- break;
- else if (!USE_BLOCK_LINKING)
+ CHECK_DOWNCOUNT();
+
+ // Handle self-looping blocks
+ if (g_state.pc == block->pc)
+ goto reexecute_block;
+ else
continue;
- next_block_key = GetNextBlockKey();
- if (next_block_key.bits == block->key.bits)
- {
- // we can jump straight to it if there's no pending interrupts
- // ensure it's not a self-modifying block
- if (!block->invalidated || RevalidateBlock(block, true))
- goto reexecute_block;
- }
- else if (!block->invalidated)
- {
- // Try to find an already-linked block.
- // TODO: Don't need to dereference the block, just store a pointer to the code.
- for (const CodeBlock::LinkInfo& li : block->link_successors)
- {
- CodeBlock* linked_block = li.block;
- if (linked_block->key.bits == next_block_key.bits)
- {
- if (linked_block->invalidated && !RevalidateBlock(linked_block, true))
- {
- // CanExecuteBlock can result in a block flush, so stop iterating here.
- break;
- }
-
- // Execute the linked block
- block = linked_block;
- goto reexecute_block;
- }
- }
-
- // No acceptable blocks found in the successor list, try a new one.
- CodeBlock* next_block = LookupBlock(next_block_key, false);
- if (next_block)
- {
- // Link the previous block to this new block if we find a new block.
- LinkBlock(block, next_block, nullptr, nullptr, 0);
- block = next_block;
- goto reexecute_block;
- }
- }
+ interpret_block:
+ InterpretUncachedBlock();
+ CHECK_DOWNCOUNT();
+ continue;
}
}
-
- // in case we switch to interpreter...
- g_state.npc = g_state.pc;
}
-#ifdef ENABLE_RECOMPILER
-
-void CompileDispatcher()
+[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter()
{
- s_code_buffer.WriteProtect(false);
-
+ if (g_settings.gpu_pgxp_enable)
{
- Recompiler::CodeGenerator cg(&s_code_buffer);
- s_asm_dispatcher = cg.CompileDispatcher();
+ if (g_settings.gpu_pgxp_cpu)
+ ExecuteCachedInterpreterImpl();
+ else
+ ExecuteCachedInterpreterImpl();
}
+ else
{
- Recompiler::CodeGenerator cg(&s_code_buffer);
- s_single_block_asm_dispatcher = cg.CompileSingleBlockDispatcher();
- }
-
- s_code_buffer.WriteProtect(true);
-}
-
-FastMapTable* GetFastMapPointer()
-{
- return s_fast_map;
-}
-
-[[noreturn]] static void ExecuteRecompiler()
-{
-#if 0
- for (;;)
- {
- if (HasPendingInterrupt())
- DispatchInterrupt();
-
- TimingEvents::RunEvents();
-
- while (g_state.pending_ticks < g_state.downcount)
- {
-#if 0
- LogCurrentState();
-#endif
-
- const u32 pc = g_state.pc;
- s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]);
- }
- }
-#else
- s_asm_dispatcher();
-#endif
- UnreachableCode();
-}
-
-#endif
-
-[[noreturn]] void Execute()
-{
- switch (g_settings.cpu_execution_mode)
- {
-#ifdef ENABLE_RECOMPILER
- case CPUExecutionMode::Recompiler:
- ExecuteRecompiler();
- break;
-#endif
-
- default:
- {
- if (g_settings.gpu_pgxp_enable)
- {
- if (g_settings.gpu_pgxp_cpu)
- ExecuteImpl();
- else
- ExecuteImpl();
- }
- else
- {
- ExecuteImpl();
- }
- }
- break;
+ ExecuteCachedInterpreterImpl();
}
}
-#if defined(ENABLE_RECOMPILER)
-
-JitCodeBuffer& GetCodeBuffer()
-{
- return s_code_buffer;
-}
-
-#endif
-
-void Reinitialize()
-{
- ClearState();
-
-#ifdef ENABLE_RECOMPILER
- ShutdownFastmem();
-#endif
-
-#if defined(ENABLE_RECOMPILER)
- s_code_buffer.Destroy();
-
- if (g_settings.IsUsingRecompiler())
- {
-#ifdef USE_STATIC_CODE_BUFFER
- if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
- RECOMPILER_GUARD_SIZE))
-#else
- if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
-#endif
- {
- Panic("Failed to initialize code space");
- }
- }
-#endif
-
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler())
- {
- if (g_settings.IsUsingFastmem() && !InitializeFastmem())
- Panic("Failed to initialize fastmem");
-
- AllocateFastMap();
- CompileDispatcher();
- ResetFastMap();
- }
-#endif
-}
-
-void Flush()
-{
- ClearState();
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler())
- CompileDispatcher();
-#endif
-}
-
-#ifndef _MSC_VER
-void __debugbreak()
-{
-}
-#endif
-
-void LogCurrentState()
+void CPU::CodeCache::LogCurrentState()
{
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) == 2546728915)
@@ -561,148 +810,16 @@ void LogCurrentState()
g_state.cop0_regs.sr.bits, static_cast(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs))));
}
-CodeBlockKey GetNextBlockKey()
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// MARK: - Block Compilation: Shared Code
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata)
{
- CodeBlockKey key;
- key.bits = 0;
- key.SetPC(g_state.pc);
- key.user_mode = InUserMode();
- return key;
-}
+ // TODO: Jump to other block if it exists at this pc?
-// assumes it has already been unlinked
-static void FallbackExistingBlockToInterpreter(CodeBlock* block)
-{
- // Replace with null so we don't try to compile it again.
- s_blocks.emplace(block->key.bits, nullptr);
- delete block;
-}
-
-CodeBlock* LookupBlock(CodeBlockKey key, bool allow_flush)
-{
- BlockMap::iterator iter = s_blocks.find(key.bits);
- if (iter != s_blocks.end())
- {
- // ensure it hasn't been invalidated
- CodeBlock* existing_block = iter->second;
- if (!existing_block || !existing_block->invalidated)
- return existing_block;
-
- // if compilation fails or we're forced back to the interpreter, bail out
- if (RevalidateBlock(existing_block, allow_flush))
- return existing_block;
- else
- return nullptr;
- }
-
- CodeBlock* block = new CodeBlock(key);
- block->recompile_frame_number = System::GetFrameNumber();
-
- if (CompileBlock(block, allow_flush))
- {
- // add it to the page map if it's in ram
- AddBlockToPageMap(block);
-
-#ifdef ENABLE_RECOMPILER
- SetFastMap(block->GetPC(), block->host_code);
- AddBlockToHostCodeMap(block);
-#endif
- }
- else
- {
- Log_ErrorPrintf("Failed to compile block at PC=0x%08X", key.GetPC());
- delete block;
- block = nullptr;
- }
-
- if (block || allow_flush)
- s_blocks.emplace(key.bits, block);
-
- return block;
-}
-
-bool RevalidateBlock(CodeBlock* block, bool allow_flush)
-{
- for (const CodeBlockInstruction& cbi : block->instructions)
- {
- u32 new_code = 0;
- SafeReadInstruction(cbi.pc, &new_code);
- if (cbi.instruction.bits != new_code)
- {
- Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc,
- cbi.instruction.bits, new_code);
- goto recompile;
- }
- }
-
- // re-add it to the page map since it's still up-to-date
- block->invalidated = false;
- AddBlockToPageMap(block);
-#ifdef ENABLE_RECOMPILER
- SetFastMap(block->GetPC(), block->host_code);
-#endif
- return true;
-
-recompile:
- // remove any references to the block from the lookup table.
- // this is an edge case where compiling causes a flush-all due to no space,
- // and we don't want to nuke the block we're compiling...
- RemoveReferencesToBlock(block);
-
-#ifdef ENABLE_RECOMPILER
- RemoveBlockFromHostCodeMap(block);
-#endif
-
- const u32 frame_number = System::GetFrameNumber();
- const u32 frame_diff = frame_number - block->recompile_frame_number;
- if (frame_diff <= RECOMPILE_FRAMES_TO_FALL_BACK_TO_INTERPRETER)
- {
- block->recompile_count++;
-
- if (block->recompile_count >= RECOMPILE_COUNT_TO_FALL_BACK_TO_INTERPRETER)
- {
- Log_PerfPrintf("Block 0x%08X has been recompiled %u times in %u frames, falling back to interpreter",
- block->GetPC(), block->recompile_count, frame_diff);
-
- FallbackExistingBlockToInterpreter(block);
- return false;
- }
- }
- else
- {
- // It's been a while since this block was modified, so it's all good.
- block->recompile_frame_number = frame_number;
- block->recompile_count = 0;
- }
-
- block->instructions.clear();
-
- if (!CompileBlock(block, allow_flush))
- {
- Log_PerfPrintf("Failed to recompile block 0x%08X, falling back to interpreter.", block->GetPC());
- FallbackExistingBlockToInterpreter(block);
- return false;
- }
-
- AddBlockToPageMap(block);
-
-#ifdef ENABLE_RECOMPILER
- // re-add to page map again
- SetFastMap(block->GetPC(), block->host_code);
- AddBlockToHostCodeMap(block);
-#endif
-
- // block is valid again
- block->invalidated = false;
-
- // re-insert into the block map since we removed it earlier.
- s_blocks.emplace(block->key.bits, block);
- return true;
-}
-
-bool CompileBlock(CodeBlock* block, bool allow_flush)
-{
- u32 pc = block->GetPC();
+ const PageProtectionMode protection = GetProtectionModeForPC(start_pc);
+ u32 pc = start_pc;
bool is_branch_delay_slot = false;
bool is_load_delay_slot = false;
@@ -711,590 +828,759 @@ bool CompileBlock(CodeBlock* block, bool allow_flush)
__debugbreak();
#endif
- block->icache_line_count = 0;
- block->uncached_fetch_ticks = 0;
- block->contains_double_branches = false;
- block->contains_loadstore_instructions = false;
+ instructions->clear();
+ metadata->icache_line_count = 0;
+ metadata->uncached_fetch_ticks = 0;
+ metadata->flags = BlockFlags::None;
u32 last_cache_line = ICACHE_LINES;
+ u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0;
for (;;)
{
- CodeBlockInstruction cbi = {};
- if (!SafeReadInstruction(pc, &cbi.instruction.bits) || !IsInvalidInstruction(cbi.instruction))
+ if (protection == PageProtectionMode::WriteProtected)
+ {
+ const u32 this_page = Bus::GetRAMCodePageIndex(pc);
+ if (this_page != last_page)
+ {
+ // if we're just crossing the page and not in a branch delay slot, jump directly to the next block
+ if (!is_branch_delay_slot)
+ {
+ Log_DevFmt("Breaking block 0x{:08X} at 0x{:08X} due to page crossing", start_pc, pc);
+ metadata->flags |= BlockFlags::SpansPages;
+ break;
+ }
+ else
+ {
+ // otherwise, we need to use manual protection in case the delay slot changes.
+ // may as well keep going then, since we're doing manual check anyways.
+ Log_DevFmt("Block 0x{:08X} has branch delay slot crossing page at 0x{:08X}, forcing manual protection",
+ start_pc, pc);
+ metadata->flags |= BlockFlags::BranchDelaySpansPages;
+ }
+ }
+ }
+
+ Instruction instruction;
+ if (!SafeReadInstruction(pc, &instruction.bits) || !IsInvalidInstruction(instruction))
break;
- cbi.pc = pc;
- cbi.is_branch_delay_slot = is_branch_delay_slot;
- cbi.is_load_delay_slot = is_load_delay_slot;
- cbi.is_branch_instruction = IsBranchInstruction(cbi.instruction);
- cbi.is_direct_branch_instruction = IsDirectBranchInstruction(cbi.instruction);
- cbi.is_unconditional_branch_instruction = IsUnconditionalBranchInstruction(cbi.instruction);
- cbi.is_load_instruction = IsMemoryLoadInstruction(cbi.instruction);
- cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction);
- cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
- cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
- cbi.is_direct_branch_instruction = IsDirectBranchInstruction(cbi.instruction);
+ InstructionInfo info;
+ std::memset(&info, 0, sizeof(info));
+
+ info.pc = pc;
+ info.is_branch_delay_slot = is_branch_delay_slot;
+ info.is_load_delay_slot = is_load_delay_slot;
+ info.is_branch_instruction = IsBranchInstruction(instruction);
+ info.is_direct_branch_instruction = IsDirectBranchInstruction(instruction);
+ info.is_unconditional_branch_instruction = IsUnconditionalBranchInstruction(instruction);
+ info.is_load_instruction = IsMemoryLoadInstruction(instruction);
+ info.is_store_instruction = IsMemoryStoreInstruction(instruction);
+ info.has_load_delay = InstructionHasLoadDelay(instruction);
+ info.can_trap = CanInstructionTrap(instruction, false /*InUserMode()*/);
+ info.is_direct_branch_instruction = IsDirectBranchInstruction(instruction);
if (g_settings.cpu_recompiler_icache)
{
const u32 icache_line = GetICacheLine(pc);
if (icache_line != last_cache_line)
{
- block->icache_line_count++;
+ metadata->icache_line_count++;
last_cache_line = icache_line;
}
}
- block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
- block->contains_loadstore_instructions |= cbi.is_load_instruction;
- block->contains_loadstore_instructions |= cbi.is_store_instruction;
+ metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
+ if (info.is_load_instruction || info.is_store_instruction)
+ metadata->flags |= BlockFlags::ContainsLoadStoreInstructions;
- pc += sizeof(cbi.instruction.bits);
+ pc += sizeof(Instruction);
- if (is_branch_delay_slot && cbi.is_branch_instruction)
+ if (is_branch_delay_slot && info.is_branch_instruction)
{
- const CodeBlockInstruction& prev_cbi = block->instructions.back();
- if (!prev_cbi.is_unconditional_branch_instruction || !prev_cbi.is_direct_branch_instruction)
+ const BlockInstructionInfoPair& prev = instructions->back();
+ if (!prev.second.is_unconditional_branch_instruction || !prev.second.is_direct_branch_instruction)
{
- Log_WarningPrintf("Conditional or indirect branch delay slot at %08X, skipping block", cbi.pc);
+ Log_WarningPrintf("Conditional or indirect branch delay slot at %08X, skipping block", info.pc);
return false;
}
- if (!IsDirectBranchInstruction(cbi.instruction))
+ if (!IsDirectBranchInstruction(instruction))
{
- Log_WarningPrintf("Indirect branch in delay slot at %08X, skipping block", cbi.pc);
+ Log_WarningPrintf("Indirect branch in delay slot at %08X, skipping block", info.pc);
return false;
}
// change the pc for the second branch's delay slot, it comes from the first branch
- pc = GetDirectBranchTarget(prev_cbi.instruction, prev_cbi.pc);
- Log_DevPrintf("Double branch at %08X, using delay slot from %08X -> %08X", cbi.pc, prev_cbi.pc, pc);
+ pc = GetDirectBranchTarget(prev.first, prev.second.pc);
+ Log_DevPrintf("Double branch at %08X, using delay slot from %08X -> %08X", info.pc, prev.second.pc, pc);
}
// instruction is decoded now
- block->instructions.push_back(cbi);
+ instructions->emplace_back(instruction, info);
// if we're in a branch delay slot, the block is now done
// except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
- if (is_branch_delay_slot && !cbi.is_branch_instruction)
+ if (is_branch_delay_slot && !info.is_branch_instruction)
break;
// if this is a branch, we grab the next instruction (delay slot), and then exit
- is_branch_delay_slot = cbi.is_branch_instruction;
+ is_branch_delay_slot = info.is_branch_instruction;
// same for load delay
- is_load_delay_slot = cbi.has_load_delay;
+ is_load_delay_slot = info.has_load_delay;
// is this a non-branchy exit? (e.g. syscall)
- if (IsExitBlockInstruction(cbi.instruction))
+ if (IsExitBlockInstruction(instruction))
break;
}
- if (!block->instructions.empty())
+ if (instructions->empty())
{
- block->instructions.back().is_last_instruction = true;
+ Log_WarningFmt("Empty block compiled at 0x{:08X}", start_pc);
+ return false;
+ }
+
+ instructions->back().second.is_last_instruction = true;
#ifdef _DEBUG
- SmallString disasm;
- Log_DebugPrintf("Block at 0x%08X", block->GetPC());
- for (const CodeBlockInstruction& cbi : block->instructions)
- {
- CPU::DisassembleInstruction(&disasm, cbi.pc, cbi.instruction.bits);
- Log_DebugPrintf("[%s %s 0x%08X] %08X %s", cbi.is_branch_delay_slot ? "BD" : " ",
- cbi.is_load_delay_slot ? "LD" : " ", cbi.pc, cbi.instruction.bits, disasm.c_str());
- }
-#endif
- }
- else
+ SmallString disasm;
+ Log_DebugPrintf("Block at 0x%08X", start_pc);
+ for (const auto& cbi : *instructions)
{
- Log_WarningPrintf("Empty block compiled at 0x%08X", block->key.GetPC());
- return false;
- }
-
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler())
- {
- // Ensure we're not going to run out of space while compiling this block.
- if (s_code_buffer.GetFreeCodeSpace() <
- (block->instructions.size() * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
- s_code_buffer.GetFreeFarCodeSpace() <
- (block->instructions.size() * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
- {
- if (allow_flush)
- {
- Log_WarningPrintf("Out of code space, flushing all blocks.");
- Flush();
- }
- else
- {
- Log_ErrorPrintf("Out of code space and cannot flush while compiling %08X.", block->GetPC());
- return false;
- }
- }
-
- s_code_buffer.WriteProtect(false);
- Recompiler::CodeGenerator codegen(&s_code_buffer);
- const bool compile_result = codegen.CompileBlock(block, &block->host_code, &block->host_code_size);
- s_code_buffer.WriteProtect(true);
-
- if (!compile_result)
- {
- Log_ErrorPrintf("Failed to compile host code for block at 0x%08X", block->key.GetPC());
- return false;
- }
+ CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits);
+ Log_DebugPrintf("[%s %s 0x%08X] %08X %s", cbi.second.is_branch_delay_slot ? "BD" : " ",
+ cbi.second.is_load_delay_slot ? "LD" : " ", cbi.second.pc, cbi.first.bits, disasm.c_str());
}
#endif
return true;
}
-#ifdef ENABLE_RECOMPILER
-
-void FastCompileBlockFunction()
+void CPU::CodeCache::CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src)
{
- CodeBlock* block = LookupBlock(GetNextBlockKey(), true);
- if (block)
- {
- s_single_block_asm_dispatcher(block->host_code);
- return;
- }
+ std::memcpy(dst->reg_flags, src->reg_flags, sizeof(dst->reg_flags));
+ std::memcpy(dst->read_reg, src->read_reg, sizeof(dst->read_reg));
+}
- if (g_settings.gpu_pgxp_enable)
+void CPU::CodeCache::SetRegAccess(InstructionInfo* inst, Reg reg, bool write)
+{
+ if (reg == Reg::zero)
+ return;
+
+ if (!write)
{
- if (g_settings.gpu_pgxp_cpu)
- InterpretUncachedBlock();
- else
- InterpretUncachedBlock();
+ for (u32 i = 0; i < std::size(inst->read_reg); i++)
+ {
+ if (inst->read_reg[i] == Reg::zero)
+ {
+ inst->read_reg[i] = reg;
+ break;
+ }
+ }
}
else
{
- InterpretUncachedBlock();
- }
-}
-
-void InvalidCodeFunction()
-{
- Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.pc);
- if (g_settings.gpu_pgxp_enable)
- {
- if (g_settings.gpu_pgxp_cpu)
- InterpretUncachedBlock();
- else
- InterpretUncachedBlock();
- }
- else
- {
- InterpretUncachedBlock();
- }
-}
-
-#endif
-
-static void InvalidateBlock(CodeBlock* block, bool allow_frame_invalidation)
-{
- // Invalidate forces the block to be checked again.
- Log_DebugPrintf("Invalidating block at 0x%08X", block->GetPC());
- block->invalidated = true;
-
- if (block->can_link)
- {
- const u32 frame_number = System::GetFrameNumber();
- if (allow_frame_invalidation)
+#if 0
+ for (u32 i = 0; i < std::size(inst->write_reg); i++)
{
- const u32 frame_diff = frame_number - block->invalidate_frame_number;
- if (frame_diff <= INVALIDATE_THRESHOLD_TO_DISABLE_LINKING)
+ if (inst->write_reg[i] == Reg::zero)
{
- Log_DevPrintf("Block 0x%08X has been invalidated in %u frames, disabling linking", block->GetPC(), frame_diff);
- block->can_link = false;
- }
- else
- {
- // It's been a while since this block was modified, so it's all good.
- block->invalidate_frame_number = frame_number;
+ inst->write_reg[i] = reg;
+ break;
}
}
- else
+#endif
+ }
+}
+
+#define BackpropSetReads(reg) \
+ do \
+ { \
+ if (!(inst->reg_flags[static_cast(reg)] & RI_USED)) \
+ inst->reg_flags[static_cast(reg)] |= RI_LASTUSE; \
+ prev->reg_flags[static_cast(reg)] |= RI_LIVE | RI_USED; \
+ inst->reg_flags[static_cast(reg)] |= RI_USED; \
+ SetRegAccess(inst, reg, false); \
+ } while (0)
+
+#define BackpropSetWrites(reg) \
+ do \
+ { \
+ prev->reg_flags[static_cast(reg)] &= ~(RI_LIVE | RI_USED); \
+ if (!(inst->reg_flags[static_cast(reg)] & RI_USED)) \
+ inst->reg_flags[static_cast(reg)] |= RI_LASTUSE; \
+ inst->reg_flags[static_cast(reg)] |= RI_USED; \
+ SetRegAccess(inst, reg, true); \
+ } while (0)
+
+// TODO: memory loads should be delayed one instruction because of stupid load delays.
+#define BackpropSetWritesDelayed(reg) BackpropSetWrites(reg)
+
+void CPU::CodeCache::FillBlockRegInfo(Block* block)
+{
+ const Instruction* iinst = block->Instructions() + (block->size - 1);
+ InstructionInfo* const start = block->InstructionsInfo();
+ InstructionInfo* inst = start + (block->size - 1);
+ std::memset(inst->reg_flags, RI_LIVE, sizeof(inst->reg_flags));
+ std::memset(inst->read_reg, 0, sizeof(inst->read_reg));
+ // std::memset(inst->write_reg, 0, sizeof(inst->write_reg));
+
+ while (inst != start)
+ {
+ InstructionInfo* prev = inst - 1;
+ CopyRegInfo(prev, inst);
+
+ const Reg rs = iinst->r.rs;
+ const Reg rt = iinst->r.rt;
+
+ switch (iinst->op)
{
- // don't trigger frame number based invalidation for this block (e.g. memory save states)
- block->invalidate_frame_number = frame_number - INVALIDATE_THRESHOLD_TO_DISABLE_LINKING - 1;
- }
- }
-
- UnlinkBlock(block);
-
-#ifdef ENABLE_RECOMPILER
- SetFastMap(block->GetPC(), FastCompileBlockFunction);
-#endif
-}
-
-void InvalidateBlocksWithPageIndex(u32 page_index)
-{
- DebugAssert(page_index < Bus::RAM_8MB_CODE_PAGE_COUNT);
- auto& blocks = m_ram_block_map[page_index];
- for (CodeBlock* block : blocks)
- InvalidateBlock(block, true);
-
- // Block will be re-added next execution.
- blocks.clear();
- Bus::ClearRAMCodePage(page_index);
-}
-
-void InvalidateAll()
-{
- for (auto& it : s_blocks)
- {
- CodeBlock* block = it.second;
- if (block && !block->invalidated)
- InvalidateBlock(block, false);
- }
-
- Bus::ClearRAMCodePageFlags();
- for (auto& it : m_ram_block_map)
- it.clear();
-}
-
-void RemoveReferencesToBlock(CodeBlock* block)
-{
- BlockMap::iterator iter = s_blocks.find(block->key.GetPC());
- Assert(iter != s_blocks.end() && iter->second == block);
-
-#ifdef ENABLE_RECOMPILER
- SetFastMap(block->GetPC(), FastCompileBlockFunction);
-#endif
-
- // if it's been invalidated it won't be in the page map
- if (!block->invalidated)
- RemoveBlockFromPageMap(block);
-
- UnlinkBlock(block);
-#ifdef ENABLE_RECOMPILER
- if (!block->invalidated)
- RemoveBlockFromHostCodeMap(block);
-#endif
-
- s_blocks.erase(iter);
-}
-
-void AddBlockToPageMap(CodeBlock* block)
-{
- if (!block->IsInRAM())
- return;
-
- const u32 start_page = block->GetStartPageIndex();
- const u32 end_page = block->GetEndPageIndex();
- for (u32 page = start_page; page <= end_page; page++)
- {
- m_ram_block_map[page].push_back(block);
- Bus::SetRAMCodePage(page);
- }
-}
-
-void RemoveBlockFromPageMap(CodeBlock* block)
-{
- if (!block->IsInRAM())
- return;
-
- const u32 start_page = block->GetStartPageIndex();
- const u32 end_page = block->GetEndPageIndex();
- for (u32 page = start_page; page <= end_page; page++)
- {
- auto& page_blocks = m_ram_block_map[page];
- auto page_block_iter = std::find(page_blocks.begin(), page_blocks.end(), block);
- Assert(page_block_iter != page_blocks.end());
- page_blocks.erase(page_block_iter);
- }
-}
-
-void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size)
-{
- Log_DebugPrintf("Linking block %p(%08x) to %p(%08x)", from, from->GetPC(), to, to->GetPC());
-
- CodeBlock::LinkInfo li;
- li.block = to;
- li.host_pc = host_pc;
- li.host_resolve_pc = host_resolve_pc;
- li.host_pc_size = host_pc_size;
- from->link_successors.push_back(li);
-
- li.block = from;
- to->link_predecessors.push_back(li);
-
-#ifdef ENABLE_RECOMPILER
- // apply in code
- if (host_pc)
- {
- Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, from->GetPC(), to, to->GetPC());
- s_code_buffer.WriteProtect(false);
- Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, reinterpret_cast(to->host_code));
- s_code_buffer.WriteProtect(true);
- }
-#endif
-}
-
-void UnlinkBlock(CodeBlock* block)
-{
- if (block->link_predecessors.empty() && block->link_successors.empty())
- return;
-
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler() && g_settings.cpu_recompiler_block_linking)
- s_code_buffer.WriteProtect(false);
-#endif
-
- for (CodeBlock::LinkInfo& li : block->link_predecessors)
- {
- auto iter = std::find_if(li.block->link_successors.begin(), li.block->link_successors.end(),
- [block](const CodeBlock::LinkInfo& li) { return li.block == block; });
- Assert(iter != li.block->link_successors.end());
-
-#ifdef ENABLE_RECOMPILER
- // Restore blocks linked to this block back to the resolver
- if (li.host_pc)
- {
- Log_ProfilePrintf("Backpatching %p(%08x) [predecessor] to jump to resolver", li.host_pc, li.block->GetPC());
- Recompiler::CodeGenerator::BackpatchBranch(li.host_pc, li.host_pc_size, li.host_resolve_pc);
- }
-#endif
-
- li.block->link_successors.erase(iter);
- }
- block->link_predecessors.clear();
-
- for (CodeBlock::LinkInfo& li : block->link_successors)
- {
- auto iter = std::find_if(li.block->link_predecessors.begin(), li.block->link_predecessors.end(),
- [block](const CodeBlock::LinkInfo& li) { return li.block == block; });
- Assert(iter != li.block->link_predecessors.end());
-
-#ifdef ENABLE_RECOMPILER
- // Restore blocks we're linking to back to the resolver, since the successor won't be linked to us to backpatch if
- // it changes.
- if (li.host_pc)
- {
- Log_ProfilePrintf("Backpatching %p(%08x) [successor] to jump to resolver", li.host_pc, li.block->GetPC());
- Recompiler::CodeGenerator::BackpatchBranch(li.host_pc, li.host_pc_size, li.host_resolve_pc);
- }
-#endif
-
- // Don't have to do anything special for successors - just let the successor know it's no longer linked.
- li.block->link_predecessors.erase(iter);
- }
- block->link_successors.clear();
-
-#ifdef ENABLE_RECOMPILER
- if (g_settings.IsUsingRecompiler() && g_settings.cpu_recompiler_block_linking)
- s_code_buffer.WriteProtect(true);
-#endif
-}
-
-#ifdef ENABLE_RECOMPILER
-
-void AddBlockToHostCodeMap(CodeBlock* block)
-{
- if (!g_settings.IsUsingRecompiler())
- return;
-
- auto ir = s_host_code_map.emplace(block->host_code, block);
- Assert(ir.second);
-}
-
-void RemoveBlockFromHostCodeMap(CodeBlock* block)
-{
- if (!g_settings.IsUsingRecompiler())
- return;
-
- HostCodeMap::iterator hc_iter = s_host_code_map.find(block->host_code);
- Assert(hc_iter != s_host_code_map.end());
- s_host_code_map.erase(hc_iter);
-}
-
-bool InitializeFastmem()
-{
- const CPUFastmemMode mode = g_settings.cpu_fastmem_mode;
- Assert(mode != CPUFastmemMode::Disabled);
-
-#ifdef ENABLE_MMAP_FASTMEM
- const auto handler = (mode == CPUFastmemMode::MMap) ? MMapPageFaultHandler : LUTPageFaultHandler;
-#else
- const auto handler = LUTPageFaultHandler;
- Assert(mode != CPUFastmemMode::MMap);
-#endif
-
- if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, s_code_buffer.GetCodePointer(),
- s_code_buffer.GetTotalSize(), handler))
- {
- Log_ErrorPrintf("Failed to install page fault handler");
- return false;
- }
-
- Bus::UpdateFastmemViews(mode);
- CPU::UpdateMemoryPointers();
- return true;
-}
-
-void ShutdownFastmem()
-{
- Common::PageFaultHandler::RemoveHandler(&s_host_code_map);
- Bus::UpdateFastmemViews(CPUFastmemMode::Disabled);
- CPU::UpdateMemoryPointers();
-}
-
-#ifdef ENABLE_MMAP_FASTMEM
-
-Common::PageFaultHandler::HandlerResult MMapPageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
-{
- if (static_cast(fault_address) < static_cast(g_state.fastmem_base) ||
- (static_cast(fault_address) - static_cast(g_state.fastmem_base)) >=
- static_cast(Bus::FASTMEM_ARENA_SIZE))
- {
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
- }
-
- const PhysicalMemoryAddress fastmem_address = static_cast(
- static_cast(static_cast(fault_address) - static_cast(g_state.fastmem_base)));
-
- Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
- is_write ? "(write)" : "(read)", fastmem_address);
-
- // use upper_bound to find the next block after the pc
- HostCodeMap::iterator upper_iter =
- s_host_code_map.upper_bound(reinterpret_cast(exception_pc));
- if (upper_iter == s_host_code_map.begin())
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
-
- // then decrement it by one to (hopefully) get the block we want
- upper_iter--;
-
- // find the loadstore info in the code block
- CodeBlock* block = upper_iter->second;
- for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
- ++bpi_iter)
- {
- Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
- if (lbi.host_pc == exception_pc)
- {
- if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(fastmem_address))
+ case InstructionOp::funct:
{
- // this is probably a code page, since we aren't going to fault due to requiring fastmem on RAM.
- const u32 code_page_index = Bus::GetRAMCodePageIndex(fastmem_address);
- if (Bus::IsRAMCodePage(code_page_index))
+ const Reg rd = iinst->r.rd;
+
+ switch (iinst->r.funct)
{
- if (++lbi.fault_count < CODE_WRITE_FAULT_THRESHOLD_FOR_SLOWMEM)
- {
- InvalidateBlocksWithPageIndex(code_page_index);
- return Common::PageFaultHandler::HandlerResult::ContinueExecution;
- }
- else
- {
- Log_DevPrintf("Backpatching code write at %p (%08X) address %p (%08X) to slowmem after threshold",
- exception_pc, lbi.guest_pc, fault_address, fastmem_address);
- }
+ case InstructionFunct::sll:
+ case InstructionFunct::srl:
+ case InstructionFunct::sra:
+ BackpropSetWrites(rd);
+ BackpropSetReads(rt);
+ break;
+
+ case InstructionFunct::sllv:
+ case InstructionFunct::srlv:
+ case InstructionFunct::srav:
+ case InstructionFunct::add:
+ case InstructionFunct::addu:
+ case InstructionFunct::sub:
+ case InstructionFunct::subu:
+ case InstructionFunct::and_:
+ case InstructionFunct::or_:
+ case InstructionFunct::xor_:
+ case InstructionFunct::nor:
+ case InstructionFunct::slt:
+ case InstructionFunct::sltu:
+ BackpropSetWrites(rd);
+ BackpropSetReads(rt);
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionFunct::jr:
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionFunct::jalr:
+ BackpropSetReads(rs);
+ BackpropSetWrites(rd);
+ break;
+
+ case InstructionFunct::mfhi:
+ BackpropSetWrites(rd);
+ BackpropSetReads(Reg::hi);
+ break;
+
+ case InstructionFunct::mflo:
+ BackpropSetWrites(rd);
+ BackpropSetReads(Reg::lo);
+ break;
+
+ case InstructionFunct::mthi:
+ BackpropSetWrites(Reg::hi);
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionFunct::mtlo:
+ BackpropSetWrites(Reg::lo);
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionFunct::mult:
+ case InstructionFunct::multu:
+ case InstructionFunct::div:
+ case InstructionFunct::divu:
+ BackpropSetWrites(Reg::hi);
+ BackpropSetWrites(Reg::lo);
+ BackpropSetReads(rs);
+ BackpropSetReads(rt);
+ break;
+
+ case InstructionFunct::syscall:
+ case InstructionFunct::break_:
+ break;
+
+ default:
+ Log_ErrorPrintf("Unknown funct %u", static_cast(iinst->r.funct.GetValue()));
+ break;
}
}
+ break;
- // found it, do fixup
- s_code_buffer.WriteProtect(false);
- const bool backpatch_result = Recompiler::CodeGenerator::BackpatchLoadStore(lbi);
- s_code_buffer.WriteProtect(true);
- if (backpatch_result)
+ case InstructionOp::b:
{
- // remove the backpatch entry since we won't be coming back to this one
- block->loadstore_backpatch_info.erase(bpi_iter);
- return Common::PageFaultHandler::HandlerResult::ContinueExecution;
+ if ((static_cast(iinst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10))
+ BackpropSetWrites(Reg::ra);
+ BackpropSetReads(rs);
}
- else
- {
- Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
- }
- }
- }
+ break;
- // we didn't find the pc in our list..
- Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
+ case InstructionOp::j:
+ break;
+
+ case InstructionOp::jal:
+ BackpropSetWrites(Reg::ra);
+ break;
+
+ case InstructionOp::beq:
+ case InstructionOp::bne:
+ BackpropSetReads(rs);
+ BackpropSetReads(rt);
+ break;
+
+ case InstructionOp::blez:
+ case InstructionOp::bgtz:
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionOp::addi:
+ case InstructionOp::addiu:
+ case InstructionOp::slti:
+ case InstructionOp::sltiu:
+ case InstructionOp::andi:
+ case InstructionOp::ori:
+ case InstructionOp::xori:
+ BackpropSetWrites(rt);
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionOp::lui:
+ BackpropSetWrites(rt);
+ break;
+
+ case InstructionOp::lb:
+ case InstructionOp::lh:
+ case InstructionOp::lw:
+ case InstructionOp::lbu:
+ case InstructionOp::lhu:
+ BackpropSetWritesDelayed(rt);
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionOp::lwl:
+ case InstructionOp::lwr:
+ BackpropSetWritesDelayed(rt);
+ BackpropSetReads(rs);
+ BackpropSetReads(rt);
+ break;
+
+ case InstructionOp::sb:
+ case InstructionOp::sh:
+ case InstructionOp::swl:
+ case InstructionOp::sw:
+ case InstructionOp::swr:
+ BackpropSetReads(rt);
+ BackpropSetReads(rs);
+ break;
+
+ case InstructionOp::cop0:
+ case InstructionOp::cop2:
+ {
+ if (iinst->cop.IsCommonInstruction())
+ {
+ switch (iinst->cop.CommonOp())
+ {
+ case CopCommonInstruction::mfcn:
+ case CopCommonInstruction::cfcn:
+ BackpropSetWritesDelayed(rt);
+ break;
+
+ case CopCommonInstruction::mtcn:
+ case CopCommonInstruction::ctcn:
+ BackpropSetReads(rt);
+ break;
+ }
+ }
+ break;
+
+ case InstructionOp::lwc2:
+ case InstructionOp::swc2:
+ BackpropSetReads(rs);
+ BackpropSetReads(rt);
+ break;
+
+ default:
+ Log_ErrorPrintf("Unknown op %u", static_cast(iinst->r.funct.GetValue()));
+ break;
+ }
+ } // end switch
+
+ inst--;
+ iinst--;
+ } // end while
}
-#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// MARK: - Recompiler Glue
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
+#ifdef ENABLE_RECOMPILER_SUPPORT
+
+void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
{
- // use upper_bound to find the next block after the pc
- HostCodeMap::iterator upper_iter =
- s_host_code_map.upper_bound(reinterpret_cast(exception_pc));
- if (upper_iter == s_host_code_map.begin())
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
+ // TODO: this doesn't currently handle when the cache overflows...
+ DebugAssert(IsUsingAnyRecompiler());
- // then decrement it by one to (hopefully) get the block we want
- upper_iter--;
-
- // find the loadstore info in the code block
- CodeBlock* block = upper_iter->second;
- for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
- ++bpi_iter)
+ Block* block = LookupBlock(start_pc);
+ if (block)
{
- Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
- if (lbi.host_pc == exception_pc)
+ // we should only be here if the block got invalidated
+ DebugAssert(block->state != BlockState::Valid);
+ if (RevalidateBlock(block))
{
- // found it, do fixup
- s_code_buffer.WriteProtect(false);
- const bool backpatch_result = Recompiler::CodeGenerator::BackpatchLoadStore(lbi);
- s_code_buffer.WriteProtect(true);
- if (backpatch_result)
- {
- // remove the backpatch entry since we won't be coming back to this one
- block->loadstore_backpatch_info.erase(bpi_iter);
- return Common::PageFaultHandler::HandlerResult::ContinueExecution;
- }
- else
- {
- Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
- }
+ DebugAssert(block->host_code);
+ SetCodeLUT(start_pc, block->host_code);
+ BacklinkBlocks(start_pc, block->host_code);
+ return;
}
+
+ // remove outward links from this block, since we're recompiling it
+ UnlinkBlockExits(block);
}
- // we didn't find the pc in our list..
- Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
- return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
+ BlockMetadata metadata = {};
+ if (!ReadBlockInstructions(start_pc, &s_block_instructions, &metadata))
+ {
+ Log_ErrorFmt("Failed to read block at 0x{:08X}, falling back to uncached interpreter", start_pc);
+ SetCodeLUT(start_pc, g_interpret_block);
+ BacklinkBlocks(start_pc, g_interpret_block);
+ return;
+ }
+
+ // Ensure we're not going to run out of space while compiling this block.
+ // We could definitely do better here... TODO: far code is no longer needed for newrec
+ const u32 block_size = static_cast(s_block_instructions.size());
+ if (s_code_buffer.GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
+ s_code_buffer.GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
+ {
+ Log_ErrorFmt("Out of code space while compiling {:08X}. Resetting code cache.", start_pc);
+ CodeCache::Reset();
+ }
+
+ if ((block = CreateBlock(start_pc, s_block_instructions, metadata)) == nullptr || block->size == 0 ||
+ !CompileBlock(block))
+ {
+ Log_ErrorFmt("Failed to compile block at 0x{:08X}, falling back to uncached interpreter", start_pc);
+ SetCodeLUT(start_pc, g_interpret_block);
+ BacklinkBlocks(start_pc, g_interpret_block);
+ return;
+ }
+
+ SetCodeLUT(start_pc, block->host_code);
+ BacklinkBlocks(start_pc, block->host_code);
}
-#endif // ENABLE_RECOMPILER
-
-} // namespace CPU::CodeCache
-
-#ifdef ENABLE_RECOMPILER
-
-void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, void* host_resolve_pc, u32 host_pc_size)
+void CPU::CodeCache::DiscardAndRecompileBlock(u32 start_pc)
{
- using namespace CPU::CodeCache;
+ Log_DevPrintf("Discard block %08X with manual protection", start_pc);
+ Block* block = LookupBlock(start_pc);
+ DebugAssert(block && block->state == BlockState::Valid);
+ InvalidateBlock(block, BlockState::NeedsRecompile);
+ CompileOrRevalidateBlock(start_pc);
+}
- CodeBlockKey key = GetNextBlockKey();
- CodeBlock* successor_block = LookupBlock(key, false);
- if (!successor_block || (successor_block->invalidated && !RevalidateBlock(successor_block, false)) ||
- !block->can_link || !successor_block->can_link)
+const void* CPU::CodeCache::CreateBlockLink(Block* block, void* code, u32 newpc)
+{
+ // self-linking should be handled by the caller
+ DebugAssert(newpc != block->pc);
+
+ const void* dst = g_dispatcher;
+ if (g_settings.cpu_recompiler_block_linking)
{
- // just turn it into a return to the dispatcher instead.
- s_code_buffer.WriteProtect(false);
- CodeGenerator::BackpatchReturn(host_pc, host_pc_size);
- s_code_buffer.WriteProtect(true);
+ const Block* next_block = LookupBlock(newpc);
+ if (next_block)
+ {
+ dst = (next_block->state == BlockState::Valid) ?
+ next_block->host_code :
+ ((next_block->state == BlockState::FallbackToInterpreter) ? g_interpret_block :
+ g_compile_or_revalidate_block);
+ DebugAssert(dst);
+ }
+ else
+ {
+ dst = g_compile_or_revalidate_block;
+ }
+
+ BlockLinkMap::iterator iter = s_block_links.emplace(newpc, code);
+ DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS);
+ block->exit_links[block->num_exit_links++] = iter;
+ }
+
+ Log_DebugPrintf("Linking %p with dst pc %08X to %p%s", code, newpc, dst,
+ (dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
+ return dst;
+}
+
+void CPU::CodeCache::BacklinkBlocks(u32 pc, const void* dst)
+{
+ if (!g_settings.cpu_recompiler_block_linking)
+ return;
+
+ const auto link_range = s_block_links.equal_range(pc);
+ for (auto it = link_range.first; it != link_range.second; ++it)
+ {
+ Log_DebugPrintf("Backlinking %p with dst pc %08X to %p%s", it->second, pc, dst,
+ (dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
+ EmitJump(it->second, dst, true);
+ }
+}
+
+void CPU::CodeCache::UnlinkBlockExits(Block* block)
+{
+ const u32 num_exit_links = block->num_exit_links;
+ for (u32 i = 0; i < num_exit_links; i++)
+ s_block_links.erase(block->exit_links[i]);
+ block->num_exit_links = 0;
+}
+
+JitCodeBuffer& CPU::CodeCache::GetCodeBuffer()
+{
+ return s_code_buffer;
+}
+
+const void* CPU::CodeCache::GetInterpretUncachedBlockFunction()
+{
+ if (g_settings.gpu_pgxp_enable)
+ {
+ if (g_settings.gpu_pgxp_cpu)
+ return reinterpret_cast(InterpretUncachedBlock);
+ else
+ return reinterpret_cast(InterpretUncachedBlock);
}
else
{
- // link blocks!
- LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size);
+ return reinterpret_cast(InterpretUncachedBlock);
}
}
+void CPU::CodeCache::ClearASMFunctions()
+{
+ g_enter_recompiler = nullptr;
+ g_compile_or_revalidate_block = nullptr;
+ g_check_events_and_dispatch = nullptr;
+ g_run_events_and_dispatch = nullptr;
+ g_dispatcher = nullptr;
+ g_interpret_block = nullptr;
+ g_discard_and_recompile_block = nullptr;
+
+#ifdef _DEBUG
+ s_total_instructions_compiled = 0;
+ s_total_host_instructions_emitted = 0;
+#endif
+}
+
+void CPU::CodeCache::CompileASMFunctions()
+{
+ s_code_buffer.WriteProtect(false);
+
+ const u32 asm_size = EmitASMFunctions(s_code_buffer.GetFreeCodePointer(), s_code_buffer.GetFreeCodeSpace());
+
+#ifdef ENABLE_RECOMPILER_PROFILING
+ MIPSPerfScope.Register(s_code_buffer.GetFreeCodePointer(), asm_size, "ASMFunctions");
+#endif
+
+ s_code_buffer.CommitCode(asm_size);
+ s_code_buffer.WriteProtect(true);
+}
+
+bool CPU::CodeCache::CompileBlock(Block* block)
+{
+ s_code_buffer.WriteProtect(false);
+
+ const void* host_code = nullptr;
+ u32 host_code_size = 0;
+ u32 host_far_code_size = 0;
+
+#ifdef ENABLE_RECOMPILER
+ if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
+ {
+ Recompiler::CodeGenerator codegen(&s_code_buffer);
+ host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size);
+ }
+#endif
+
+ s_code_buffer.WriteProtect(true);
+
+ block->host_code = host_code;
+
+ if (!host_code)
+ {
+ Log_ErrorFmt("Failed to compile host code for block at 0x{:08X}", block->pc);
+ block->state = BlockState::FallbackToInterpreter;
+ return false;
+ }
+
+#ifdef _DEBUG
+ const u32 host_instructions = GetHostInstructionCount(host_code, host_code_size);
+ s_total_instructions_compiled += block->size;
+ s_total_host_instructions_emitted += host_instructions;
+
+ Log_ProfileFmt("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}",
+ block->pc, host_code_size, host_far_code_size, block->size * 4, block->size,
+ static_cast(host_code_size) / static_cast(block->size * 4), s_code_buffer.GetUsedPct(),
+ s_code_buffer.GetFarUsedPct(), static_cast(host_instructions) / static_cast(block->size),
+ static_cast(s_total_host_instructions_emitted) /
+ static_cast(s_total_instructions_compiled));
+#else
+ Log_ProfileFmt("0x{:08X}: {}/{}b for {}b ({} inst), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%", block->pc,
+ host_code_size, host_far_code_size, block->size * 4, block->size,
+ static_cast(host_code_size) / static_cast(block->size * 4), s_code_buffer.GetUsedPct(),
+ s_code_buffer.GetFarUsedPct());
+#endif
+
+#if 0
+ Log_DebugPrint("***HOST CODE**");
+ DisassembleAndLogHostCode(host_code, host_code_size);
+#endif
+
+#ifdef ENABLE_RECOMPILER_PROFILING
+ MIPSPerfScope.RegisterPC(host_code, host_code_size, block->pc);
+#endif
+
+ return true;
+}
+
+void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address)
+{
+ DebugAssert(code_size < std::numeric_limits::max());
+
+ auto iter = s_fastmem_backpatch_info.find(code_address);
+ if (iter != s_fastmem_backpatch_info.end())
+ s_fastmem_backpatch_info.erase(iter);
+
+ LoadstoreBackpatchInfo info;
+ info.thunk_address = thunk_address;
+ info.guest_pc = guest_pc;
+ info.code_size = static_cast(code_size);
+ s_fastmem_backpatch_info.emplace(code_address, info);
+}
+
+void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles,
+ u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size,
+ bool is_signed, bool is_load)
+{
+ DebugAssert(code_size < std::numeric_limits::max());
+ DebugAssert(cycles >= 0 && cycles < std::numeric_limits::max());
+
+ auto iter = s_fastmem_backpatch_info.find(code_address);
+ if (iter != s_fastmem_backpatch_info.end())
+ s_fastmem_backpatch_info.erase(iter);
+
+ LoadstoreBackpatchInfo info;
+ info.thunk_address = nullptr;
+ info.guest_pc = guest_pc;
+ info.gpr_bitmask = gpr_bitmask;
+ info.cycles = static_cast(cycles);
+ info.address_register = address_register;
+ info.data_register = data_register;
+ info.size = static_cast(size);
+ info.is_signed = is_signed;
+ info.is_load = is_load;
+ info.code_size = static_cast(code_size);
+ s_fastmem_backpatch_info.emplace(code_address, info);
+}
+
+Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address,
+ bool is_write)
+{
+ // TODO: Catch general RAM writes, not just fastmem
+ PhysicalMemoryAddress guest_address;
+
+#ifdef ENABLE_MMAP_FASTMEM
+ if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
+ {
+ if (static_cast(fault_address) < static_cast(g_state.fastmem_base) ||
+ (static_cast(fault_address) - static_cast(g_state.fastmem_base)) >=
+ static_cast(Bus::FASTMEM_ARENA_SIZE))
+ {
+ return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
+ }
+
+ guest_address = static_cast(
+ static_cast(static_cast(fault_address) - static_cast(g_state.fastmem_base)));
+ }
+ else
+#endif
+ {
+ // LUT fastmem - we can't compute the address.
+ guest_address = std::numeric_limits::max();
+ }
+
+ Log_DevFmt("Page fault handler invoked at PC={} Address={} {}, fastmem offset {:08X}", exception_pc, fault_address,
+ is_write ? "(write)" : "(read)", guest_address);
+
+ auto iter = s_fastmem_backpatch_info.find(exception_pc);
+ if (iter == s_fastmem_backpatch_info.end())
+ {
+ Log_ErrorFmt("No backpatch info found for {}", exception_pc);
+ return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
+ }
+
+ // if we're writing to ram, let it go through a few times, and use manual block protection to sort it out
+ // TODO: path for manual protection to return back to read-only pages
+ LoadstoreBackpatchInfo& info = iter->second;
+ if (is_write && !g_state.cop0_regs.sr.Isc && AddressInRAM(guest_address))
+ {
+ Log_DevFmt("Ignoring fault due to RAM write @ 0x{:08X}", guest_address);
+ InvalidateBlocksWithPageIndex(Bus::GetRAMCodePageIndex(guest_address));
+ return Common::PageFaultHandler::HandlerResult::ContinueExecution;
+ }
+
+ Log_DevFmt("Backpatching {} at {}[{}] (pc {:08X} addr {:08X}): Bitmask {:08X} Addr {} Data {} Size {} Signed {:02X}",
+ info.is_load ? "load" : "store", exception_pc, info.code_size, info.guest_pc, guest_address,
+ info.gpr_bitmask, static_cast(info.address_register), static_cast(info.data_register),
+ info.AccessSizeInBytes(), static_cast(info.is_signed));
+
+ BackpatchLoadStore(exception_pc, info);
+
+ // TODO: queue block for recompilation later
+
+ // and store the pc in the faulting list, so that we don't emit another fastmem loadstore
+ s_fastmem_faulting_pcs.insert(info.guest_pc);
+ s_fastmem_backpatch_info.erase(iter);
+ return Common::PageFaultHandler::HandlerResult::ContinueExecution;
+}
+
+void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info)
+{
+ s_code_buffer.WriteProtect(false);
+
+#ifdef ENABLE_RECOMPILER
+ if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
+ Recompiler::CodeGenerator::BackpatchLoadStore(host_pc, info);
+#endif
+
+ s_code_buffer.WriteProtect(true);
+}
+
+#ifdef ENABLE_RECOMPILER
+
void CPU::Recompiler::Thunks::LogPC(u32 pc)
{
-#if 1
+#if 0
+ const u32 cyc = TimingEvents::GetGlobalTickCounter() + GetPendingTicks();
+ s_last_cyc = cyc;
+ if (s_last_cyc == 3302138733)
+ __debugbreak();
+#endif
+#if 0
CPU::CodeCache::LogCurrentState();
#endif
#if 0
- if (TimingEvents::GetGlobalTickCounter() + GetPendingTicks() == 382856482)
+ if (TimingEvents::GetGlobalTickCounter() + GetPendingTicks() == 181991709)
__debugbreak();
#endif
}
#endif // ENABLE_RECOMPILER
+
+#endif // ENABLE_RECOMPILER_SUPPORT
diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h
index 6411253a0..126243afa 100644
--- a/src/core/cpu_code_cache.h
+++ b/src/core/cpu_code_cache.h
@@ -1,160 +1,42 @@
-// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin
+// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
+
#include "bus.h"
-#include "common/bitfield.h"
#include "cpu_types.h"
-#include "util/jit_code_buffer.h"
-#include "util/page_fault_handler.h"
-#include
-#include