mirror of
https://github.com/WinampDesktop/winamp.git
synced 2025-06-21 07:35:41 -04:00
System: Refactor main loop
Reduces JIT exits. Improves runahead performance.
This commit is contained in:
@ -12,6 +12,8 @@ add_library(common
|
||||
dimensional_array.h
|
||||
error.cpp
|
||||
error.h
|
||||
fastjmp.cpp
|
||||
fastjmp.h
|
||||
fifo_queue.h
|
||||
file_system.cpp
|
||||
file_system.h
|
||||
@ -97,6 +99,14 @@ if(WIN32)
|
||||
windows_headers.h
|
||||
)
|
||||
target_link_libraries(common PRIVATE d3dcompiler.lib)
|
||||
|
||||
if(${CPU_ARCH} STREQUAL "x64")
|
||||
enable_language(ASM_MASM)
|
||||
target_sources(common PRIVATE fastjmp_x86.asm)
|
||||
elseif(${CPU_ARCH} STREQUAL "aarch32" OR ${CPU_ARCH} STREQUAL "aarch64")
|
||||
enable_language(ASM_MARMASM)
|
||||
target_sources(common PRIVATE fastjmp_arm.asm)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WIN32 AND NOT ANDROID)
|
||||
|
@ -23,6 +23,7 @@
|
||||
<ClInclude Include="dimensional_array.h" />
|
||||
<ClInclude Include="easing.h" />
|
||||
<ClInclude Include="error.h" />
|
||||
<ClInclude Include="fastjmp.h" />
|
||||
<ClInclude Include="fifo_queue.h" />
|
||||
<ClInclude Include="file_system.h" />
|
||||
<ClInclude Include="gl\context.h">
|
||||
@ -123,6 +124,7 @@
|
||||
<ClCompile Include="d3d12\stream_buffer.cpp" />
|
||||
<ClCompile Include="d3d12\texture.cpp" />
|
||||
<ClCompile Include="d3d12\util.cpp" />
|
||||
<ClCompile Include="fastjmp.cpp" />
|
||||
<ClCompile Include="file_system.cpp" />
|
||||
<ClCompile Include="gl\context.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
|
||||
@ -192,6 +194,16 @@
|
||||
<Natvis Include="bitfield.natvis" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<MARMASM Include="fastjmp_arm.asm">
|
||||
<FileType>Document</FileType>
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
|
||||
</MARMASM>
|
||||
<MASM Include="fastjmp_x86.asm">
|
||||
<FileType>Document</FileType>
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='Win32' And '$(Platform)'!='x64'">true</ExcludedFromBuild>
|
||||
<PreprocessorDefinitions Condition="'$(Platform)'=='Win32'">_M_X86_32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(Platform)'=='x64'">_M_X86_64;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</MASM>
|
||||
<None Include="vulkan\entry_points.inl">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
|
||||
</None>
|
||||
@ -219,9 +231,17 @@
|
||||
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.targets" />
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{EE054E08-3799-4A59-A422-18259C105FFD}</ProjectGuid>
|
||||
</PropertyGroup>
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.props" />
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.props" />
|
||||
</ImportGroup>
|
||||
<Import Project="..\..\dep\msvc\vsprops\StaticLibrary.props" />
|
||||
<Import Project="common.props" />
|
||||
<ItemDefinitionGroup>
|
||||
|
@ -129,6 +129,7 @@
|
||||
<ClInclude Include="build_timestamp.h" />
|
||||
<ClInclude Include="sha1_digest.h" />
|
||||
<ClInclude Include="gpu_texture.h" />
|
||||
<ClInclude Include="fastjmp.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="gl\program.cpp">
|
||||
@ -234,6 +235,7 @@
|
||||
<ClCompile Include="threading.cpp" />
|
||||
<ClCompile Include="sha1_digest.cpp" />
|
||||
<ClCompile Include="gpu_texture.cpp" />
|
||||
<ClCompile Include="fastjmp.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Natvis Include="bitfield.natvis" />
|
||||
@ -260,4 +262,10 @@
|
||||
<Filter>vulkan</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<MASM Include="fastjmp_x86.asm" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<MARMASM Include="fastjmp_arm.asm" />
|
||||
</ItemGroup>
|
||||
</Project>
|
166
src/common/fastjmp.cpp
Normal file
166
src/common/fastjmp.cpp
Normal file
@ -0,0 +1,166 @@
|
||||
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
#include "fastjmp.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#define PREFIX "_"
|
||||
#else
|
||||
#define PREFIX ""
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
asm("\t.global " PREFIX "fastjmp_set\n"
|
||||
"\t.global " PREFIX "fastjmp_jmp\n"
|
||||
"\t.text\n"
|
||||
"\t" PREFIX "fastjmp_set:"
|
||||
R"(
|
||||
movq 0(%rsp), %rax
|
||||
movq %rsp, %rdx # fixup stack pointer, so it doesn't include the call to fastjmp_set
|
||||
addq $8, %rdx
|
||||
movq %rax, 0(%rdi) # actually rip
|
||||
movq %rbx, 8(%rdi)
|
||||
movq %rdx, 16(%rdi) # actually rsp
|
||||
movq %rbp, 24(%rdi)
|
||||
movq %r12, 32(%rdi)
|
||||
movq %r13, 40(%rdi)
|
||||
movq %r14, 48(%rdi)
|
||||
movq %r15, 56(%rdi)
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
)"
|
||||
"\t" PREFIX "fastjmp_jmp:"
|
||||
R"(
|
||||
movl %esi, %eax
|
||||
movq 0(%rdi), %rdx # actually rip
|
||||
movq 8(%rdi), %rbx
|
||||
movq 16(%rdi), %rsp # actually rsp
|
||||
movq 24(%rdi), %rbp
|
||||
movq 32(%rdi), %r12
|
||||
movq 40(%rdi), %r13
|
||||
movq 48(%rdi), %r14
|
||||
movq 56(%rdi), %r15
|
||||
jmp *%rdx
|
||||
)");
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
asm(
|
||||
"\t.global " PREFIX "fastjmp_set\n"
|
||||
"\t.global " PREFIX "fastjmp_jmp\n"
|
||||
"\t.text\n"
|
||||
"\t.align 16\n"
|
||||
"\t" PREFIX "fastjmp_set:" R"(
|
||||
mov x16, sp
|
||||
stp x16, x30, [x0]
|
||||
stp x19, x20, [x0, #16]
|
||||
stp x21, x22, [x0, #32]
|
||||
stp x23, x24, [x0, #48]
|
||||
stp x25, x26, [x0, #64]
|
||||
stp x27, x28, [x0, #80]
|
||||
str x29, [x0, #96]
|
||||
stp d8, d9, [x0, #112]
|
||||
stp d10, d11, [x0, #128]
|
||||
stp d12, d13, [x0, #144]
|
||||
stp d14, d15, [x0, #160]
|
||||
mov w0, wzr
|
||||
br x30
|
||||
)"
|
||||
".align 16\n"
|
||||
"\t" PREFIX "fastjmp_jmp:" R"(
|
||||
ldp x16, x30, [x0]
|
||||
mov sp, x16
|
||||
ldp x19, x20, [x0, #16]
|
||||
ldp x21, x22, [x0, #32]
|
||||
ldp x23, x24, [x0, #48]
|
||||
ldp x25, x26, [x0, #64]
|
||||
ldp x27, x28, [x0, #80]
|
||||
ldr x29, [x0, #96]
|
||||
ldp d8, d9, [x0, #112]
|
||||
ldp d10, d11, [x0, #128]
|
||||
ldp d12, d13, [x0, #144]
|
||||
ldp d14, d15, [x0, #160]
|
||||
mov w0, w1
|
||||
br x30
|
||||
)");
|
||||
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
|
||||
asm(
|
||||
"\t.global " PREFIX "fastjmp_set\n"
|
||||
"\t.global " PREFIX "fastjmp_jmp\n"
|
||||
"\t.text\n"
|
||||
"\t.align 16\n"
|
||||
"\t" PREFIX "fastjmp_set:" R"(
|
||||
sd sp, 0(a0)
|
||||
sd s0, 8(a0)
|
||||
sd s1, 16(a0)
|
||||
sd s2, 24(a0)
|
||||
sd s3, 32(a0)
|
||||
sd s4, 40(a0)
|
||||
sd s5, 48(a0)
|
||||
sd s6, 56(a0)
|
||||
sd s7, 64(a0)
|
||||
sd s8, 72(a0)
|
||||
sd s9, 80(a0)
|
||||
sd s10, 88(a0)
|
||||
sd s11, 96(a0)
|
||||
fsd fs0, 104(a0)
|
||||
fsd fs1, 112(a0)
|
||||
fsd fs2, 120(a0)
|
||||
fsd fs3, 128(a0)
|
||||
fsd fs4, 136(a0)
|
||||
fsd fs5, 144(a0)
|
||||
fsd fs6, 152(a0)
|
||||
fsd fs7, 160(a0)
|
||||
fsd fs8, 168(a0)
|
||||
fsd fs9, 176(a0)
|
||||
fsd fs10, 184(a0)
|
||||
fsd fs11, 192(a0)
|
||||
sd ra, 208(a0)
|
||||
li a0, 0
|
||||
jr ra
|
||||
)"
|
||||
".align 16\n"
|
||||
"\t" PREFIX "fastjmp_jmp:" R"(
|
||||
ld ra, 208(a0)
|
||||
fld fs11, 192(a0)
|
||||
fld fs10, 184(a0)
|
||||
fld fs9, 176(a0)
|
||||
fld fs8, 168(a0)
|
||||
fld fs7, 160(a0)
|
||||
fld fs6, 152(a0)
|
||||
fld fs5, 144(a0)
|
||||
fld fs4, 136(a0)
|
||||
fld fs3, 128(a0)
|
||||
fld fs2, 120(a0)
|
||||
fld fs1, 112(a0)
|
||||
fld fs0, 104(a0)
|
||||
ld s11, 96(a0)
|
||||
ld s10, 88(a0)
|
||||
ld s9, 80(a0)
|
||||
ld s8, 72(a0)
|
||||
ld s7, 64(a0)
|
||||
ld s6, 56(a0)
|
||||
ld s5, 48(a0)
|
||||
ld s4, 40(a0)
|
||||
ld s3, 32(a0)
|
||||
ld s2, 24(a0)
|
||||
ld s1, 16(a0)
|
||||
ld s0, 8(a0)
|
||||
ld sp, 0(a0)
|
||||
mv a0, a1
|
||||
jr ra
|
||||
)");
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#error Unknown platform.
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __WIN32
|
33
src/common/fastjmp.h
Normal file
33
src/common/fastjmp.h
Normal file
@ -0,0 +1,33 @@
|
||||
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
struct fastjmp_buf
|
||||
{
|
||||
#if defined(_WIN32) && defined(_M_AMD64)
|
||||
static constexpr std::size_t BUF_SIZE = 240;
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
static constexpr std::size_t BUF_SIZE = 168;
|
||||
#elif defined(__x86_64__)
|
||||
static constexpr std::size_t BUF_SIZE = 64;
|
||||
#elif defined(_M_IX86) || defined(__i386__)
|
||||
static constexpr std::size_t BUF_SIZE = 24;
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
static constexpr std::size_t BUF_SIZE = 208;
|
||||
#else
|
||||
#error Unknown architecture.
|
||||
#endif
|
||||
|
||||
alignas(16) std::uint8_t buf[BUF_SIZE];
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
int fastjmp_set(fastjmp_buf* buf);
|
||||
[[noreturn]] void fastjmp_jmp(const fastjmp_buf* buf, int ret);
|
||||
}
|
47
src/common/fastjmp_arm.asm
Normal file
47
src/common/fastjmp_arm.asm
Normal file
@ -0,0 +1,47 @@
|
||||
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
|
||||
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "ksarm64.h"
|
||||
|
||||
EXPORT fastjmp_set
|
||||
EXPORT fastjmp_jmp
|
||||
|
||||
TEXTAREA
|
||||
|
||||
; void fastjmp_set(fastjmp_buf*)
|
||||
LEAF_ENTRY fastjmp_set
|
||||
mov x16, sp
|
||||
stp x16, x30, [x0]
|
||||
stp x19, x20, [x0, #16]
|
||||
stp x21, x22, [x0, #32]
|
||||
stp x23, x24, [x0, #48]
|
||||
stp x25, x26, [x0, #64]
|
||||
stp x27, x28, [x0, #80]
|
||||
str x29, [x0, #96]
|
||||
stp d8, d9, [x0, #112]
|
||||
stp d10, d11, [x0, #128]
|
||||
stp d12, d13, [x0, #144]
|
||||
stp d14, d15, [x0, #160]
|
||||
mov w0, wzr
|
||||
br x30
|
||||
LEAF_END
|
||||
|
||||
; void fastjmp_jmp(fastjmp_buf*, int)
|
||||
LEAF_ENTRY fastjmp_jmp
|
||||
ldp x16, x30, [x0]
|
||||
mov sp, x16
|
||||
ldp x19, x20, [x0, #16]
|
||||
ldp x21, x22, [x0, #32]
|
||||
ldp x23, x24, [x0, #48]
|
||||
ldp x25, x26, [x0, #64]
|
||||
ldp x27, x28, [x0, #80]
|
||||
ldr x29, [x0, #96]
|
||||
ldp d8, d9, [x0, #112]
|
||||
ldp d10, d11, [x0, #128]
|
||||
ldp d12, d13, [x0, #144]
|
||||
ldp d14, d15, [x0, #160]
|
||||
mov w0, w1
|
||||
br x30
|
||||
LEAF_END
|
||||
|
||||
END
|
119
src/common/fastjmp_x86.asm
Normal file
119
src/common/fastjmp_x86.asm
Normal file
@ -0,0 +1,119 @@
|
||||
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
|
||||
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
IFDEF _M_X86_32
|
||||
|
||||
; -----------------------------------------
|
||||
; 32-bit X86
|
||||
; -----------------------------------------
|
||||
.386
|
||||
.model flat
|
||||
|
||||
_TEXT SEGMENT
|
||||
|
||||
PUBLIC @fastjmp_set@4
|
||||
PUBLIC @fastjmp_jmp@8
|
||||
|
||||
; void fastjmp_set(fastjmp_buf*)
|
||||
@fastjmp_set@4 PROC
|
||||
mov eax, dword ptr [esp]
|
||||
mov edx, esp ; fixup stack pointer, so it doesn't include the call to fastjmp_set
|
||||
add edx, 4
|
||||
mov dword ptr [ecx], eax ; actually eip
|
||||
mov dword ptr [ecx + 4], ebx
|
||||
mov dword ptr [ecx + 8], edx ; actually esp
|
||||
mov dword ptr [ecx + 12], ebp
|
||||
mov dword ptr [ecx + 16], esi
|
||||
mov dword ptr [ecx + 20], edi
|
||||
xor eax, eax
|
||||
ret
|
||||
@fastjmp_set@4 ENDP
|
||||
|
||||
; void __fastcall fastjmp_jmp(fastjmp_buf*, int)
|
||||
@fastjmp_jmp@8 PROC
|
||||
mov eax, edx ; return code
|
||||
mov edx, dword ptr [ecx + 0]
|
||||
mov ebx, dword ptr [ecx + 4]
|
||||
mov esp, dword ptr [ecx + 8]
|
||||
mov ebp, dword ptr [ecx + 12]
|
||||
mov esi, dword ptr [ecx + 16]
|
||||
mov edi, dword ptr [ecx + 20]
|
||||
jmp edx
|
||||
@fastjmp_jmp@8 ENDP
|
||||
|
||||
_TEXT ENDS
|
||||
|
||||
ENDIF ; _M_X86_32
|
||||
|
||||
IFDEF _M_X86_64
|
||||
|
||||
; -----------------------------------------
|
||||
; 64-bit X86
|
||||
; -----------------------------------------
|
||||
_TEXT SEGMENT
|
||||
|
||||
PUBLIC fastjmp_set
|
||||
PUBLIC fastjmp_jmp
|
||||
|
||||
; void fastjmp_set(fastjmp_buf*)
|
||||
fastjmp_set PROC
|
||||
mov rax, qword ptr [rsp]
|
||||
mov rdx, rsp ; fixup stack pointer, so it doesn't include the call to fastjmp_set
|
||||
add rdx, 8
|
||||
mov qword ptr [rcx], rax ; actually rip
|
||||
mov qword ptr [rcx + 8], rbx
|
||||
mov qword ptr [rcx + 16], rdx ; actually rsp
|
||||
mov qword ptr [rcx + 24], rbp
|
||||
mov qword ptr [rcx + 32], rsi
|
||||
mov qword ptr [rcx + 40], rdi
|
||||
mov qword ptr [rcx + 48], r12
|
||||
mov qword ptr [rcx + 56], r13
|
||||
mov qword ptr [rcx + 64], r14
|
||||
mov qword ptr [rcx + 72], r15
|
||||
movaps xmmword ptr [rcx + 80], xmm6
|
||||
movaps xmmword ptr [rcx + 96], xmm7
|
||||
movaps xmmword ptr [rcx + 112], xmm8
|
||||
add rcx, 112 ; split to two batches to fit displacement in a single byte
|
||||
movaps xmmword ptr [rcx + 16], xmm9
|
||||
movaps xmmword ptr [rcx + 32], xmm10
|
||||
movaps xmmword ptr [rcx + 48], xmm11
|
||||
movaps xmmword ptr [rcx + 64], xmm12
|
||||
movaps xmmword ptr [rcx + 80], xmm13
|
||||
movaps xmmword ptr [rcx + 96], xmm14
|
||||
movaps xmmword ptr [rcx + 112], xmm15
|
||||
xor eax, eax
|
||||
ret
|
||||
fastjmp_set ENDP
|
||||
|
||||
; void fastjmp_jmp(fastjmp_buf*, int)
|
||||
fastjmp_jmp PROC
|
||||
mov eax, edx ; return code
|
||||
mov rdx, qword ptr [rcx + 0] ; actually rip
|
||||
mov rbx, qword ptr [rcx + 8]
|
||||
mov rsp, qword ptr [rcx + 16]
|
||||
mov rbp, qword ptr [rcx + 24]
|
||||
mov rsi, qword ptr [rcx + 32]
|
||||
mov rdi, qword ptr [rcx + 40]
|
||||
mov r12, qword ptr [rcx + 48]
|
||||
mov r13, qword ptr [rcx + 56]
|
||||
mov r14, qword ptr [rcx + 64]
|
||||
mov r15, qword ptr [rcx + 72]
|
||||
movaps xmm6, xmmword ptr [rcx + 80]
|
||||
movaps xmm7, xmmword ptr [rcx + 96]
|
||||
movaps xmm8, xmmword ptr [rcx + 112]
|
||||
add rcx, 112 ; split to two batches to fit displacement in a single byte
|
||||
movaps xmm9, xmmword ptr [rcx + 16]
|
||||
movaps xmm10, xmmword ptr [rcx + 32]
|
||||
movaps xmm11, xmmword ptr [rcx + 48]
|
||||
movaps xmm12, xmmword ptr [rcx + 64]
|
||||
movaps xmm13, xmmword ptr [rcx + 80]
|
||||
movaps xmm14, xmmword ptr [rcx + 96]
|
||||
movaps xmm15, xmmword ptr [rcx + 112]
|
||||
jmp rdx
|
||||
fastjmp_jmp ENDP
|
||||
|
||||
_TEXT ENDS
|
||||
|
||||
ENDIF ; _M_X86_64
|
||||
|
||||
END
|
@ -27,6 +27,8 @@
|
||||
#define CPU_AARCH64 1
|
||||
#elif defined(__arm__)
|
||||
#define CPU_AARCH32 1
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
#define CPU_RISCV64 1
|
||||
#else
|
||||
#error Unknown architecture.
|
||||
#endif
|
||||
|
@ -65,6 +65,13 @@ char (&__countof_ArraySizeHelper(T (&array)[N]))[N];
|
||||
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
// [[noreturn]] which can be used on function pointers.
|
||||
#ifdef _MSC_VER
|
||||
// __declspec(noreturn) produces error C3829.
|
||||
#define NORETURN_FUNCTION_POINTER
|
||||
#else
|
||||
#define NORETURN_FUNCTION_POINTER __attribute__((noreturn))
|
||||
#endif
|
||||
|
||||
// disable warnings that show up at warning level 4
|
||||
// TODO: Move to build system instead
|
||||
|
Reference in New Issue
Block a user