From 9d40164f84b15338f6df9a6c64a4a2cd973d071b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 7 Oct 2023 17:31:19 +1000 Subject: [PATCH] CPU/Recompiler: Offset LUT fastmem pointers --- src/core/bus.cpp | 19 +- src/core/bus.h | 5 +- src/core/cpu_core.cpp | 2 +- .../cpu_recompiler_code_generator_aarch32.cpp | 7 +- .../cpu_recompiler_code_generator_aarch64.cpp | 199 +++----- .../cpu_recompiler_code_generator_x64.cpp | 480 ++++++------------ 6 files changed, 248 insertions(+), 464 deletions(-) diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 5ecefec9b..25d52cb3b 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -141,6 +141,8 @@ static void SetRAMSize(bool enable_8mb_ram); static std::tuple CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay); static void RecalculateMemoryTimings(); +static u8* GetLUTFastmemPointer(u32 address, u8* ram_ptr); + static void SetRAMPageWritable(u32 page_index, bool writable); static void SetHandlers(); @@ -460,18 +462,23 @@ CPUFastmemMode Bus::GetFastmemMode() return s_fastmem_mode; } -void* Bus::GetFastmemBase() +void* Bus::GetFastmemBase(bool isc) { #ifdef ENABLE_MMAP_FASTMEM if (s_fastmem_mode == CPUFastmemMode::MMap) - return s_fastmem_arena.BasePointer(); + return isc ? nullptr : s_fastmem_arena.BasePointer(); #endif if (s_fastmem_mode == CPUFastmemMode::LUT) - return reinterpret_cast(s_fastmem_lut); + return reinterpret_cast(s_fastmem_lut + (isc ? (FASTMEM_LUT_SIZE * sizeof(void*)) : 0)); return nullptr; } +u8* Bus::GetLUTFastmemPointer(u32 address, u8* ram_ptr) +{ + return ram_ptr - address; +} + void Bus::UpdateFastmemViews(CPUFastmemMode mode) { #ifndef ENABLE_MMAP_FASTMEM @@ -530,20 +537,20 @@ void Bus::UpdateFastmemViews(CPUFastmemMode mode) if (!s_fastmem_lut) { - s_fastmem_lut = static_cast(std::malloc(sizeof(u8*) * FASTMEM_LUT_SIZE)); + s_fastmem_lut = static_cast(std::malloc(sizeof(u8*) * FASTMEM_LUT_SLOTS)); Assert(s_fastmem_lut); Log_InfoPrintf("Fastmem base (software): %p", s_fastmem_lut); } - std::memset(s_fastmem_lut, 0, sizeof(u8*) * FASTMEM_LUT_SIZE); + std::memset(s_fastmem_lut, 0, sizeof(u8*) * FASTMEM_LUT_SLOTS); auto MapRAM = [](u32 base_address) { u8* ram_ptr = g_ram + (base_address & g_ram_mask); for (u32 address = 0; address < g_ram_size; address += FASTMEM_LUT_PAGE_SIZE) { const u32 lut_index = (base_address + address) >> FASTMEM_LUT_PAGE_SHIFT; - s_fastmem_lut[lut_index] = ram_ptr; + s_fastmem_lut[lut_index] = GetLUTFastmemPointer(base_address + address, ram_ptr); ram_ptr += FASTMEM_LUT_PAGE_SIZE; } }; diff --git a/src/core/bus.h b/src/core/bus.h index ee609f4c6..1d87eb53e 100644 --- a/src/core/bus.h +++ b/src/core/bus.h @@ -96,7 +96,8 @@ enum : u32 FASTMEM_LUT_PAGE_SIZE = 4096, FASTMEM_LUT_PAGE_MASK = FASTMEM_LUT_PAGE_SIZE - 1, FASTMEM_LUT_PAGE_SHIFT = 12, - FASTMEM_LUT_SIZE = 0x100000, // 0x100000000 >> 12 + FASTMEM_LUT_SIZE = 0x100000, // 0x100000000 >> 12 + FASTMEM_LUT_SLOTS = FASTMEM_LUT_SIZE * 2, // [isc] }; #ifdef ENABLE_MMAP_FASTMEM @@ -125,7 +126,7 @@ ALWAYS_INLINE_RELEASE static FP* OffsetHandlerArray(void** handlers, MemoryAcces } CPUFastmemMode GetFastmemMode(); -void* GetFastmemBase(); +void* GetFastmemBase(bool isc); void UpdateFastmemViews(CPUFastmemMode mode); bool CanUseFastmemForAddress(VirtualMemoryAddress address); diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 8eacdf163..531453bde 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -2396,7 +2396,7 @@ ALWAYS_INLINE_RELEASE Bus::MemoryWriteHandler CPU::GetMemoryWriteHandler(Virtual void CPU::UpdateMemoryPointers() { g_state.memory_handlers = Bus::GetMemoryHandlers(g_state.cop0_regs.sr.Isc, g_state.cop0_regs.sr.Swc); - g_state.fastmem_base = g_state.cop0_regs.sr.Isc ? nullptr : Bus::GetFastmemBase(); + g_state.fastmem_base = Bus::GetFastmemBase(g_state.cop0_regs.sr.Isc); } void CPU::ExecutionModeChanged() diff --git a/src/core/cpu_recompiler_code_generator_aarch32.cpp b/src/core/cpu_recompiler_code_generator_aarch32.cpp index e1fb940ab..45f61c470 100644 --- a/src/core/cpu_recompiler_code_generator_aarch32.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch32.cpp @@ -1367,22 +1367,21 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, } m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); m_emit->ldr(GetHostReg32(RARG1), a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load switch (size) { case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); break; case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); break; case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); break; default: diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 887bad787..19672d106 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1734,60 +1734,38 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, address_reg = address.host_reg; } - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) - { - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - } - else + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) { m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); - m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3)); + m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); + } - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + const a64::XRegister membase = + (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; - default: - UnreachableCode(); - break; - } + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; + + default: + UnreachableCode(); + break; } } void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, RegSize size, Value& result) { - // fastmem - void* host_pc = GetCurrentNearCodePointer(); - HostReg address_reg; if (address.IsConstant()) { @@ -1799,57 +1777,36 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const Co address_reg = address.host_reg; } - m_register_cache.InhibitAllocation(); - - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) - { - host_pc = GetCurrentNearCodePointer(); - - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - } - else + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) { m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); - m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3)); + m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); + } - host_pc = GetCurrentNearCodePointer(); + const a64::XRegister membase = + (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + m_register_cache.InhibitAllocation(); - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + void* host_pc = GetCurrentNearCodePointer(); - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; - default: - UnreachableCode(); - break; - } + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; + + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; + + default: + UnreachableCode(); + break; } const u32 host_code_size = @@ -1957,9 +1914,6 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const C { Value value_in_hr = GetValueInHostRegister(value); - // fastmem - void* host_pc = GetCurrentNearCodePointer(); - HostReg address_reg; if (address.IsConstant()) { @@ -1971,56 +1925,37 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const C address_reg = address.host_reg; } - m_register_cache.InhibitAllocation(); - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) - { - host_pc = GetCurrentNearCodePointer(); - - switch (size) - { - case RegSize_8: - m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - } - else + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) { m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK); - m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetHostReg64(RARG3), GetHostReg32(RARG1), a64::LSL, 3)); + m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); + } - host_pc = GetCurrentNearCodePointer(); + const a64::XRegister membase = + (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - switch (size) - { - case RegSize_8: - m_emit->strb(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + // fastmem + void* host_pc = GetCurrentNearCodePointer(); - case RegSize_16: - m_emit->strh(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + m_register_cache.InhibitAllocation(); - case RegSize_32: - m_emit->str(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); - break; + switch (size) + { + case RegSize_8: + m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; - default: - UnreachableCode(); - break; - } + case RegSize_16: + m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; + + case RegSize_32: + m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); + break; + + default: + UnreachableCode(); + break; } const u32 host_code_size = diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index d1efeabad..b5114237a 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -2094,200 +2094,117 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) { - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) { - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - if (address.IsConstant() && address.constant_value >= 0x80000000) - { - actual_address = &result; - m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); - } - - // TODO: movsx/zx inline here - switch (size) - { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; - - case RegSize_16: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; - - case RegSize_32: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; - - default: - UnreachableCode(); - break; - } - } - else - { - // TODO: We could mask the LSBs here for unaligned protection. EmitCopyValue(RARG1, address); - m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); - m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK); + m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); + } - switch (size) + const Xbyak::Reg64 membase = + (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); + + // can't store displacements > 0x80000000 in-line + const Value* actual_address = &address; + if (address.IsConstant() && address.constant_value >= 0x80000000) + { + actual_address = &result; + m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); + } + + // TODO: movsx/zx inline here + switch (size) + { + case RegSize_8: { - case RegSize_8: - m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); - break; - - case RegSize_16: - m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); - break; - - case RegSize_32: - m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); - break; - - default: - UnreachableCode(); - break; + if (actual_address->IsConstant()) + m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]); + else + m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]); } + break; + + case RegSize_16: + { + if (actual_address->IsConstant()) + m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]); + else + m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]); + } + break; + + case RegSize_32: + { + if (actual_address->IsConstant()) + m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]); + else + m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]); + } + break; + + default: + UnreachableCode(); + break; } } void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, RegSize size, Value& result) { - // fastmem + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) + { + EmitCopyValue(RARG1, address); + m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); + m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); + } + + const Xbyak::Reg64 membase = + (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); + + // can't store displacements > 0x80000000 in-line + const Value* actual_address = &address; + if (address.IsConstant() && address.constant_value >= 0x80000000) + { + actual_address = &result; + m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); + } + void* host_pc = GetCurrentNearCodePointer(); - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) + m_register_cache.InhibitAllocation(); + + switch (size) { - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - if (address.IsConstant() && address.constant_value >= 0x80000000) + case RegSize_8: { - actual_address = &result; - m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); - host_pc = GetCurrentNearCodePointer(); + if (actual_address->IsConstant()) + m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]); + else + m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]); } + break; - m_register_cache.InhibitAllocation(); - - switch (size) + case RegSize_16: { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; - - case RegSize_16: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; - - case RegSize_32: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; - - default: - UnreachableCode(); - break; + if (actual_address->IsConstant()) + m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]); + else + m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]); } - } - else - { - m_register_cache.InhibitAllocation(); + break; - // TODO: We could mask the LSBs here for unaligned protection. - EmitCopyValue(RARG1, address); - m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); - m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK); - m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); - host_pc = GetCurrentNearCodePointer(); - - switch (size) + case RegSize_32: { - case RegSize_8: - m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); - break; - - case RegSize_16: - m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); - break; - - case RegSize_32: - m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); - break; - - default: - UnreachableCode(); - break; + if (actual_address->IsConstant()) + m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]); + else + m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]); } + break; + + default: + UnreachableCode(); + break; } // insert nops, we need at least 5 bytes for a relative jump @@ -2398,168 +2315,93 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, RegSize size, const Value& value) { + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) + { + EmitCopyValue(RARG1, address); + m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); + m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); + } + + // can't store displacements > 0x80000000 in-line + const Value* actual_address = &address; + Value temp_address; + if (address.IsConstant() && address.constant_value >= 0x80000000) + { + temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32); + actual_address = &temp_address; + m_emit->mov(GetHostReg32(temp_address), address.constant_value); + } + + const Xbyak::Reg64 membase = + (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); + // fastmem void* host_pc = GetCurrentNearCodePointer(); - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) + m_register_cache.InhibitAllocation(); + + switch (size) { - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - Value temp_address; - if (address.IsConstant() && address.constant_value >= 0x80000000) + case RegSize_8: { - temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32); - actual_address = &temp_address; - m_emit->mov(GetHostReg32(temp_address), address.constant_value); - host_pc = GetCurrentNearCodePointer(); - } - - m_register_cache.InhibitAllocation(); - - switch (size) - { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], - value.constant_value & 0xFFu); - } - else - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], - GetHostReg8(value.host_reg)); - } - } - else - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - value.constant_value & 0xFFu); - } - else - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - GetHostReg8(value.host_reg)); - } - } - } - break; - - case RegSize_16: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], - value.constant_value & 0xFFFFu); - } - else - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], - GetHostReg16(value.host_reg)); - } - } - else - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - value.constant_value & 0xFFFFu); - } - else - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - GetHostReg16(value.host_reg)); - } - } - } - break; - - case RegSize_32: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); - } - else - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], - GetHostReg32(value.host_reg)); - } - } - else - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - value.constant_value); - } - else - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - GetHostReg32(value.host_reg)); - } - } - } - break; - - default: - UnreachableCode(); - break; - } - } - else - { - m_register_cache.InhibitAllocation(); - - // TODO: We could mask the LSBs here for unaligned protection. - EmitCopyValue(RARG1, address); - m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); - m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK); - m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); - host_pc = GetCurrentNearCodePointer(); - - switch (size) - { - case RegSize_8: + if (actual_address->IsConstant()) { if (value.IsConstant()) - m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value & 0xFFu); + m_emit->mov(m_emit->byte[membase + actual_address->constant_value], value.constant_value & 0xFFu); else - m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg8(value.host_reg)); + m_emit->mov(m_emit->byte[membase + actual_address->constant_value], GetHostReg8(value.host_reg)); } - break; - - case RegSize_16: + else { if (value.IsConstant()) - m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value & 0xFFFFu); + m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFu); else - m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg16(value.host_reg)); + m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], GetHostReg8(value.host_reg)); } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value); - else - m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg32(value.host_reg)); - } - break; - - default: - UnreachableCode(); - break; } + break; + + case RegSize_16: + { + if (actual_address->IsConstant()) + { + if (value.IsConstant()) + m_emit->mov(m_emit->word[membase + actual_address->constant_value], value.constant_value & 0xFFFFu); + else + m_emit->mov(m_emit->word[membase + actual_address->constant_value], GetHostReg16(value.host_reg)); + } + else + { + if (value.IsConstant()) + m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFFFu); + else + m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], GetHostReg16(value.host_reg)); + } + } + break; + + case RegSize_32: + { + if (actual_address->IsConstant()) + { + if (value.IsConstant()) + m_emit->mov(m_emit->dword[membase + actual_address->constant_value], value.constant_value); + else + m_emit->mov(m_emit->dword[membase + actual_address->constant_value], GetHostReg32(value.host_reg)); + } + else + { + if (value.IsConstant()) + m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], value.constant_value); + else + m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], GetHostReg32(value.host_reg)); + } + } + break; + + default: + UnreachableCode(); + break; } // insert nops, we need at least 5 bytes for a relative jump