CPU/Recompiler: Write exception exits to far code buffer

Keeps the hot path nice and clean.
This commit is contained in:
Connor McLaughlin 2019-11-22 17:57:02 +10:00
parent 7b0978119b
commit 11966e4caf
7 changed files with 345 additions and 286 deletions

View File

@ -7,10 +7,12 @@
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
JitCodeBuffer::JitCodeBuffer(size_t size) JitCodeBuffer::JitCodeBuffer(size_t size /* = 64 * 1024 * 1024 */, size_t far_code_size /* = 0 */)
{ {
m_total_size = size + far_code_size;
#if defined(Y_PLATFORM_WINDOWS) #if defined(Y_PLATFORM_WINDOWS)
m_code_ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); m_code_ptr = VirtualAlloc(nullptr, m_total_size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID) #elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID)
m_code_ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); m_code_ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#else #else
@ -20,6 +22,10 @@ JitCodeBuffer::JitCodeBuffer(size_t size)
m_code_size = size; m_code_size = size;
m_code_used = 0; m_code_used = 0;
m_far_code_ptr = static_cast<u8*>(m_code_ptr) + size;
m_far_code_size = far_code_size;
m_far_code_used = 0;
if (!m_code_ptr) if (!m_code_ptr)
Panic("Failed to allocate code space."); Panic("Failed to allocate code space.");
} }
@ -27,34 +33,39 @@ JitCodeBuffer::JitCodeBuffer(size_t size)
JitCodeBuffer::~JitCodeBuffer() JitCodeBuffer::~JitCodeBuffer()
{ {
#if defined(Y_PLATFORM_WINDOWS) #if defined(Y_PLATFORM_WINDOWS)
VirtualFree(m_code_ptr, m_code_size, MEM_RELEASE); VirtualFree(m_code_ptr, m_total_size, MEM_RELEASE);
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID) #elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID)
munmap(m_code_ptr, m_code_size); munmap(m_code_ptr, m_total_size);
#endif #endif
} }
void JitCodeBuffer::CommitCode(size_t length) void JitCodeBuffer::CommitCode(size_t length)
{ {
// // Function alignment?
// size_t extra_bytes = ((length % 16) != 0) ? (16 - (length % 16)) : 0;
// for (size_t i = 0; i < extra_bytes; i++)
// reinterpret_cast<char*>(m_free_code_ptr)[i] = 0xCC;
Assert(length <= (m_code_size - m_code_used)); Assert(length <= (m_code_size - m_code_used));
m_free_code_ptr = reinterpret_cast<char*>(m_free_code_ptr) + length; m_free_code_ptr = reinterpret_cast<u8*>(m_free_code_ptr) + length;
m_code_used += length; m_code_used += length;
} }
void JitCodeBuffer::CommitFarCode(size_t length)
{
Assert(length <= (m_far_code_size - m_far_code_used));
m_free_far_code_ptr = reinterpret_cast<u8*>(m_free_far_code_ptr) + length;
m_far_code_used += length;
}
void JitCodeBuffer::Reset() void JitCodeBuffer::Reset()
{ {
#if defined(Y_PLATFORM_WINDOWS) #if defined(Y_PLATFORM_WINDOWS)
FlushInstructionCache(GetCurrentProcess(), m_code_ptr, m_code_size); FlushInstructionCache(GetCurrentProcess(), m_code_ptr, m_total_size);
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID) #elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID)
// TODO // TODO
#endif #endif
m_free_code_ptr = m_code_ptr; m_free_code_ptr = m_code_ptr;
m_code_used = 0; m_code_used = 0;
m_free_far_code_ptr = m_far_code_ptr;
m_far_code_used = 0;
} }
void JitCodeBuffer::Align(u32 alignment, u8 padding_value) void JitCodeBuffer::Align(u32 alignment, u8 padding_value)

View File

@ -4,13 +4,18 @@
class JitCodeBuffer class JitCodeBuffer
{ {
public: public:
JitCodeBuffer(size_t size = 64 * 1024 * 1024); JitCodeBuffer(size_t size = 64 * 1024 * 1024, size_t far_code_size = 0);
~JitCodeBuffer(); ~JitCodeBuffer();
void Reset();
void* GetFreeCodePointer() const { return m_free_code_ptr; } void* GetFreeCodePointer() const { return m_free_code_ptr; }
size_t GetFreeCodeSpace() const { return (m_code_size - m_code_used); } size_t GetFreeCodeSpace() const { return (m_code_size - m_code_used); }
void CommitCode(size_t length); void CommitCode(size_t length);
void Reset();
void* GetFreeFarCodePointer() const { return m_free_far_code_ptr; }
size_t GetFreeFarCodeSpace() const { return (m_far_code_size - m_far_code_used); }
void CommitFarCode(size_t length);
/// Adjusts the free code pointer to the specified alignment, padding with bytes. /// Adjusts the free code pointer to the specified alignment, padding with bytes.
/// Assumes alignment is a power-of-two. /// Assumes alignment is a power-of-two.
@ -21,5 +26,12 @@ private:
void* m_free_code_ptr; void* m_free_code_ptr;
size_t m_code_size; size_t m_code_size;
size_t m_code_used; size_t m_code_used;
void* m_far_code_ptr;
void* m_free_far_code_ptr;
size_t m_far_code_size;
size_t m_far_code_used;
size_t m_total_size;
}; };

View File

@ -9,8 +9,11 @@ Log_SetChannel(CPU::CodeCache);
namespace CPU { namespace CPU {
bool USE_CODE_CACHE = true; bool USE_CODE_CACHE = false;
bool USE_RECOMPILER = true; bool USE_RECOMPILER = false;
static constexpr size_t RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
static constexpr size_t RECOMPILER_FAR_CODE_CACHE_SIZE = 32 * 1024 * 1024;
CodeCache::CodeCache() = default; CodeCache::CodeCache() = default;
@ -22,7 +25,7 @@ void CodeCache::Initialize(System* system, Core* core, Bus* bus)
m_core = core; m_core = core;
m_bus = bus; m_bus = bus;
m_code_buffer = std::make_unique<JitCodeBuffer>(); m_code_buffer = std::make_unique<JitCodeBuffer>(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
m_asm_functions = std::make_unique<Recompiler::ASMFunctions>(); m_asm_functions = std::make_unique<Recompiler::ASMFunctions>();
m_asm_functions->Generate(m_code_buffer.get()); m_asm_functions->Generate(m_code_buffer.get());
} }
@ -50,7 +53,7 @@ void CodeCache::Execute()
#if 0 #if 0
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(); const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
if (tick == 8950812) if (tick == 58672386)
__debugbreak(); __debugbreak();
#endif #endif
@ -184,6 +187,11 @@ bool CodeCache::CompileBlock(CodeBlock* block)
bool is_branch_delay_slot = false; bool is_branch_delay_slot = false;
bool is_load_delay_slot = false; bool is_load_delay_slot = false;
#if 0
if (pc == 0x0005aa90)
__debugbreak();
#endif
for (;;) for (;;)
{ {
CodeBlockInstruction cbi = {}; CodeBlockInstruction cbi = {};
@ -247,7 +255,10 @@ bool CodeCache::CompileBlock(CodeBlock* block)
if (USE_RECOMPILER) if (USE_RECOMPILER)
{ {
// Ensure we're not going to run out of space while compiling this block. // Ensure we're not going to run out of space while compiling this block.
if (m_code_buffer->GetFreeCodeSpace() < (block->instructions.size() * Recompiler::MAX_HOST_BYTES_PER_INSTRUCTION)) if (m_code_buffer->GetFreeCodeSpace() <
(block->instructions.size() * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
m_code_buffer->GetFreeFarCodeSpace() <
(block->instructions.size() * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
{ {
Log_WarningPrintf("Out of code space, flushing all blocks."); Log_WarningPrintf("Out of code space, flushing all blocks.");
Reset(); Reset();

View File

@ -7,7 +7,8 @@ namespace CPU::Recompiler {
CodeGenerator::CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions) CodeGenerator::CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions)
: m_cpu(cpu), m_code_buffer(code_buffer), m_asm_functions(asm_functions), m_register_cache(*this), : m_cpu(cpu), m_code_buffer(code_buffer), m_asm_functions(asm_functions), m_register_cache(*this),
m_emit(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()) m_near_emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()),
m_far_emitter(code_buffer->GetFreeFarCodeSpace(), code_buffer->GetFreeFarCodePointer()), m_emit(&m_near_emitter)
{ {
InitHostRegs(); InitHostRegs();
} }
@ -608,7 +609,7 @@ void CodeGenerator::BlockPrologue()
void CodeGenerator::BlockEpilogue() void CodeGenerator::BlockEpilogue()
{ {
#if defined(_DEBUG) && defined(Y_CPU_X64) #if defined(_DEBUG) && defined(Y_CPU_X64)
m_emit.nop(); m_emit->nop();
#endif #endif
m_register_cache.FlushAllGuestRegisters(true, true); m_register_cache.FlushAllGuestRegisters(true, true);
@ -632,7 +633,7 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
bool force_sync /* = false */) bool force_sync /* = false */)
{ {
#if defined(_DEBUG) && defined(Y_CPU_X64) #if defined(_DEBUG) && defined(Y_CPU_X64)
m_emit.nop(); m_emit->nop();
#endif #endif
// reset dirty flags // reset dirty flags

View File

@ -33,9 +33,6 @@ public:
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize); static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void AlignCodeBuffer(JitCodeBuffer* code_buffer); static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
RegisterCache& GetRegisterCache() { return m_register_cache; }
CodeEmitter& GetCodeEmitter() { return m_emit; }
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size); bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -146,14 +143,18 @@ private:
Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend); Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend);
void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend); void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend);
void SwitchToFarCode();
void SwitchToNearCode();
void* GetCurrentNearCodePointer() const;
void* GetCurrentFarCodePointer() const;
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Code Generation Helpers // Code Generation Helpers
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// branch target, memory address, etc // branch target, memory address, etc
void BlockPrologue(); void BlockPrologue();
void BlockEpilogue(); void BlockEpilogue();
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false);
bool force_sync = false);
void InstructionEpilogue(const CodeBlockInstruction& cbi); void InstructionEpilogue(const CodeBlockInstruction& cbi);
void SyncCurrentInstructionPC(); void SyncCurrentInstructionPC();
void SyncPC(); void SyncPC();
@ -182,7 +183,9 @@ private:
const CodeBlockInstruction* m_block_start = nullptr; const CodeBlockInstruction* m_block_start = nullptr;
const CodeBlockInstruction* m_block_end = nullptr; const CodeBlockInstruction* m_block_end = nullptr;
RegisterCache m_register_cache; RegisterCache m_register_cache;
CodeEmitter m_emit; CodeEmitter m_near_emitter;
CodeEmitter m_far_emitter;
CodeEmitter* m_emit;
u32 m_delayed_pc_add = 0; u32 m_delayed_pc_add = 0;
TickCount m_delayed_cycles_add = 0; TickCount m_delayed_cycles_add = 0;
@ -197,4 +200,4 @@ private:
bool m_next_load_delay_dirty = false; bool m_next_load_delay_dirty = false;
}; };
} // namespace CPU_X86::Recompiler } // namespace CPU::Recompiler

File diff suppressed because it is too large Load Diff

View File

@ -35,7 +35,8 @@ constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr RegSize HostPointerSize = RegSize_64; constexpr RegSize HostPointerSize = RegSize_64;
// A reasonable "maximum" number of bytes per instruction. // A reasonable "maximum" number of bytes per instruction.
constexpr u32 MAX_HOST_BYTES_PER_INSTRUCTION = 128; constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
// Are shifts implicitly masked to 0..31? // Are shifts implicitly masked to 0..31?
constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = true; constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = true;